diff --git a/.github/workflows/k8s-ci.yml b/.github/workflows/k8s-ci.yml index c198531745f2..d5fd37044f05 100644 --- a/.github/workflows/k8s-ci.yml +++ b/.github/workflows/k8s-ci.yml @@ -88,7 +88,7 @@ jobs: - '.github/workflows/ci.yml' gie-k8s-failover-test: - 'interactive_engine/**' - - 'charts/ir-standalone/**' + - 'charts/gie-standalone/**' - name: Cpp Format and Lint Check run: | @@ -647,13 +647,16 @@ jobs: run: | cd ${GITHUB_WORKSPACE}/charts # create local persistent volume which contains graph data for test - kubectl apply -f ./ir-standalone/tools/pvc.yaml + kubectl apply -f ./gie-standalone/tools/pvc.yaml # create gie instance (compiler & executor & exp storage) - helm install test ./ir-standalone \ - --set image.repository=graphscope/interactive-experimental \ - --set image.tag=${SHORT_SHA} --set storageType=Experimental \ + helm install test ./gie-standalone \ + --set frontend.image.repository=graphscope/interactive-experimental \ + --set frontend.image.tag=${SHORT_SHA} \ + --set executor.image.repository=graphscope/interactive-experimental \ + --set executor.image.tag=${SHORT_SHA} \ + --set storageType=Experimental \ --set schemaConfig=expr_modern_schema.json \ - --set store.replicaCount=2 \ + --set executor.replicaCount=2 \ --set frontend.service.type=NodePort # run failover test - cd ${GITHUB_WORKSPACE}/interactive_engine/compiler && ./ir_k8s_failover_ci.sh default test-graphscope-store 2 1 + cd ${GITHUB_WORKSPACE}/interactive_engine/compiler && ./ir_k8s_failover_ci.sh default test-gie-standalone 2 1 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 010c2c883ea0..9a4fc555473d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -103,6 +103,8 @@ jobs: helm package ./graphscope # package graphscope-store helm package --dependency-update ./graphscope-store + # package gie-standalone + helm package --dependency-update ./gie-standalone # generate index.yaml helm repo index . --url ${{ env.HELM_CHARTS_URL }} @@ -121,4 +123,5 @@ jobs: assets: | charts/graphscope-${{ steps.tag.outputs.TAG }}.tgz:/charts/graphscope-${{ steps.tag.outputs.TAG }}.tgz charts/graphscope-store-${{ steps.tag.outputs.TAG }}.tgz:/charts/graphscope-store-${{ steps.tag.outputs.TAG }}.tgz + charts/gie-standalone-${{ steps.tag.outputs.TAG }}.tgz:/charts/gie-standalone-${{ steps.tag.outputs.TAG }}.tgz charts/index.yaml:/charts/index.yaml diff --git a/charts/ir-standalone/Chart.yaml b/charts/gie-standalone/Chart.yaml similarity index 53% rename from charts/ir-standalone/Chart.yaml rename to charts/gie-standalone/Chart.yaml index 771581c62692..67cddbce69ee 100644 --- a/charts/ir-standalone/Chart.yaml +++ b/charts/gie-standalone/Chart.yaml @@ -1,16 +1,18 @@ annotations: category: Database + licenses: Apache-2.0 apiVersion: v2 -appVersion: latest -description: Chart to create a GraphScope Store cluster (Ir on Vineyard) +appVersion: 3.4.0 +description: Chart to Deploy GIE on Vineyard Storage home: https://graphscope.io +icon: https://graphscope.io/assets/images/graphscope-logo.svg keywords: - GraphScope - database - cluster -name: graphscope-store +name: gie-standalone sources: - https://github.com/alibaba/GraphScope/tree/main/interactive_engine/ -version: v2 +version: 0.20.0 dependencies: diff --git a/charts/gie-standalone/README.md b/charts/gie-standalone/README.md new file mode 100644 index 000000000000..abb8c368791f --- /dev/null +++ b/charts/gie-standalone/README.md @@ -0,0 +1,173 @@ +# GIE Standalone Deployment +## Prerequisite +### etcd +``` +# TODO: the link need to be updated after merging to main +kubectl apply -f https://raw.githubusercontent.com/shirly121/GraphScope/add_gie_deploy/charts/gie-standalone/tools/etcd.yaml +``` +### prepare graph data +- vineyard +``` +# there are some sample data for tests under the `gie-standalone` directory, just copy them to the target directory + +data +└── modern_graph + ├── created.csv + ├── knows.csv + ├── person.csv + └── software.csv + +helm pull graphscope/gie-standalone --untar +cp -r gie-standalone/data/ /tmp/data/ +``` +- experimental +``` +# experimental storage will create modern graph for tests by default, +# prepare your own data under the directories of `graph_schema` and `graph_data_bin` if needed. +cp -r graph_schema /tmp/data/ +cp -r graph_data_bin /tmp/data +``` +### prepare k8s volume +- download `pvc.yaml` +``` +# TODO: the link need to be updated after merging to main +curl -O -S https://raw.githubusercontent.com/shirly121/GraphScope/add_gie_deploy/charts/gie-standalone/tools/pvc.yaml +``` +- config `pvc.yaml` +``` +hostPath: + path: /tmp/data # keep consistent with the directory where the graph data is located +``` +- create pvc and pv +``` +kubectl apply -f pvc.yaml +``` +## Getting Started +### start gie deployment +- from remote +``` +helm repo update +helm install graphscope/gie-standalone +``` +- from local (for customized config) +``` +# download helm package +helm pull graphscope/gie-standalone --untar +helm install gie-standalone +``` +### stop gie deployment +``` +helm delete +``` +### get service endpoint +``` +# execute in advance if in minikube environment +minikube tunnel +# gremlin endpoint +kubectl describe svc -gie-standalone-frontend | grep "Endpoints:" | awk -F' ' '{print $2}' +``` +## Customized Config +### download helm package +``` +helm pull graphscope/gie-standalone --untar + +gie-standalone/ +├── Chart.yaml +├── config +│   └── v6d_modern_loader.json +├── data +│   └── modern_graph +│   ├── created.csv +│   ├── knows.csv +│   ├── person.csv +│   └── software.csv +├── README.md +├── templates +│   ├── configmap.yaml +│   ├── frontend +│   │   ├── statefulset.yaml +│   │   └── svc.yaml +│   ├── _helpers.tpl +│   ├── serviceaccount.yaml +│   └── store +│   ├── statefulset.yaml +│   └── svc-headless.yaml +├── tools +│   ├── etcd.yaml +│   └── pvc.yaml +└── values.yaml +``` +### config `gie-standalone/values.yaml` +#### vineyard +- docker artifacts +``` +# docker artifacts for vineyard store +executor: + image: + registry: registry.cn-hongkong.aliyuncs.com + repository: graphscope/interactive-executor + tag: "0.20.0" + +# docker artifacts for frontend +frontend: + image: + registry: registry.cn-hongkong.aliyuncs.com + repository: graphscope/interactive-frontend + tag: "0.20.0" +``` +- load your own graph data +``` +# add your graph loader config under `gie-standalone/config` + +config +└── v6d_modern_loader.json + +# config the file name in `gie-standalone/values.yaml` +htapLoaderConfig: "v6d_modern_loader.json" +``` +#### common +``` +frontend: + replicaCount: 1 # frontend num + service: + gremlinPort: 8182 # gremlin service port + +executor: + replicaCount: 1 # executor num + +# job config +pegasusWorkerNum: 2 +pegasusTimeout: 240000 +pegasusBatchSize: 1024 +pegasusOutputCapacity: 16 + +# k8s volume to store graph data +existingClaim: "test-graphscope-store-pvc" + +# data path where the inner pod read graph data from +storeDataPath: "/tmp/data" +# hdfs path is supported in vineyard +# storeDataPath: "hdfs://{ip}:{port}" +``` +#### experimental +``` +# docker artifacts for experimental store +executor: + image: + registry: registry.cn-hongkong.aliyuncs.com + repository: graphscope/gie-exp-runtime + tag: "" + +# docker artifacts for frontend +frontend: + image: + registry: registry.cn-hongkong.aliyuncs.com + repository: graphscope/gie-exp-runtime + tag: "" + +# storage type +storageType: Experimental + +# schema needed by compiler, config in `gie-standalone/templates/configmap.yaml` +schemaConfig: "expr_modern_schema.json" +``` diff --git a/charts/gie-standalone/config/v6d_modern_loader.json b/charts/gie-standalone/config/v6d_modern_loader.json new file mode 100644 index 000000000000..a2e2381ccd19 --- /dev/null +++ b/charts/gie-standalone/config/v6d_modern_loader.json @@ -0,0 +1,33 @@ +{ + "vertices": [ + { + "data_path": "$STORE_DATA_PATH/modern_graph/person.csv", + "label": "person", + "options": "header_row=true&delimiter=|" + }, + { + "data_path": "$STORE_DATA_PATH/modern_graph/software.csv", + "label": "software", + "options": "header_row=true&delimiter=|" + } + ], + "edges": [ + { + "data_path": "$STORE_DATA_PATH/modern_graph/knows.csv", + "label": "knows", + "src_label": "person", + "dst_label": "person", + "options": "header_row=true&delimiter=|" + }, + { + "data_path": "$STORE_DATA_PATH/modern_graph/created.csv", + "label": "created", + "src_label": "person", + "dst_label": "software", + "options": "header_row=true&delimiter=|" + } + ], + "directed": 1, + "retain_oid": 1, + "generate_eid": 1 +} diff --git a/charts/gie-standalone/data/modern_graph/created.csv b/charts/gie-standalone/data/modern_graph/created.csv new file mode 100644 index 000000000000..cc8adfc525ec --- /dev/null +++ b/charts/gie-standalone/data/modern_graph/created.csv @@ -0,0 +1,5 @@ +src_id|dst_id|edge_id|weight +1|3|9|0.4 +4|5|10|1.0 +4|3|11|0.4 +6|3|12|0.2 diff --git a/charts/gie-standalone/data/modern_graph/knows.csv b/charts/gie-standalone/data/modern_graph/knows.csv new file mode 100644 index 000000000000..e1144cc2c717 --- /dev/null +++ b/charts/gie-standalone/data/modern_graph/knows.csv @@ -0,0 +1,3 @@ +src_id|dst_id|edge_id|weight +1|2|7|0.5 +1|4|8|1.0 diff --git a/charts/gie-standalone/data/modern_graph/person.csv b/charts/gie-standalone/data/modern_graph/person.csv new file mode 100644 index 000000000000..1ec20cd59a3e --- /dev/null +++ b/charts/gie-standalone/data/modern_graph/person.csv @@ -0,0 +1,5 @@ +id|name|age +2|vadas|27 +6|peter|35 +4|josh|32 +1|marko|29 diff --git a/charts/gie-standalone/data/modern_graph/software.csv b/charts/gie-standalone/data/modern_graph/software.csv new file mode 100644 index 000000000000..79c6d8f92ed3 --- /dev/null +++ b/charts/gie-standalone/data/modern_graph/software.csv @@ -0,0 +1,3 @@ +id|name|lang +3|lop|java +5|ripple|java diff --git a/charts/ir-standalone/templates/_helpers.tpl b/charts/gie-standalone/templates/_helpers.tpl similarity index 90% rename from charts/ir-standalone/templates/_helpers.tpl rename to charts/gie-standalone/templates/_helpers.tpl index 8f0f9033c6e6..964cf43776c1 100644 --- a/charts/ir-standalone/templates/_helpers.tpl +++ b/charts/gie-standalone/templates/_helpers.tpl @@ -71,11 +71,39 @@ app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} {{/* -Return the proper graphscope-store image name +Return the proper graphscope-store frontend image name */}} -{{- define "graphscope-store.image" -}} -{{ include "graphscope-store.images.image" . }} +{{- define "graphscope-store.frontend.image" -}} +{{- $tag := .Chart.AppVersion | toString -}} +{{- with .Values.frontend.image -}} +{{- if .tag -}} +{{- $tag = .tag | toString -}} +{{- end -}} +{{- if .registry -}} +{{- printf "%s/%s:%s" .registry .repository $tag -}} +{{- else -}} +{{- printf "%s:%s" .repository $tag -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Return the proper graphscope-store store image name +*/}} +{{- define "graphscope-store.store.image" -}} +{{- $tag := .Chart.AppVersion | toString -}} +{{- with .Values.executor.image -}} +{{- if .tag -}} +{{- $tag = .tag | toString -}} +{{- end -}} +{{- if .registry -}} +{{- printf "%s/%s:%s" .registry .repository $tag -}} +{{- else -}} +{{- printf "%s:%s" .repository $tag -}} +{{- end -}} {{- end -}} +{{- end -}} + {{/* Create the name of the service account to use @@ -124,25 +152,6 @@ Usage: {{- end -}} -{{/* -Return the proper image name -{{ include "graphscope-store.images.image" . }} -*/}} -{{- define "graphscope-store.images.image" -}} -{{- $tag := .Chart.AppVersion | toString -}} -{{- with .Values.image -}} -{{- if .tag -}} -{{- $tag = .tag | toString -}} -{{- end -}} -{{- if .registry -}} -{{- printf "%s/%s:%s" .registry .repository $tag -}} -{{- else -}} -{{- printf "%s:%s" .repository $tag -}} -{{- end -}} -{{- end -}} -{{- end -}} - - {{/* Return the proper Storage Class {{ include "graphscope-store.storage.class" .Values.path.to.the.persistence }} diff --git a/charts/ir-standalone/templates/configmap.yaml b/charts/gie-standalone/templates/configmap.yaml similarity index 83% rename from charts/ir-standalone/templates/configmap.yaml rename to charts/gie-standalone/templates/configmap.yaml index 933143c146dd..e76507ec9910 100644 --- a/charts/ir-standalone/templates/configmap.yaml +++ b/charts/gie-standalone/templates/configmap.yaml @@ -13,212 +13,6 @@ metadata: annotations: {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} {{- end }} data: - v6d_modern_loader.json: |- - { - "vertices": [ - { - "data_path": "$STORE_DATA_PATH/modern_graph/person.csv", - "label": "person", - "options": "header_row=true&delimiter=|" - }, - { - "data_path": "$STORE_DATA_PATH/modern_graph/software.csv", - "label": "software", - "options": "header_row=true&delimiter=|" - } - ], - "edges": [ - { - "data_path": "$STORE_DATA_PATH/modern_graph/knows.csv", - "label": "knows", - "src_label": "person", - "dst_label": "person", - "options": "header_row=true&delimiter=|" - }, - { - "data_path": "$STORE_DATA_PATH/modern_graph/created.csv", - "label": "created", - "src_label": "person", - "dst_label": "software", - "options": "header_row=true&delimiter=|" - } - ], - "directed": 1, - "generate_eid": 1 - } - v6d_modern_schema.json: |- - { - "partitionNum": 1, - "types": [ - { - "id": 0, - "indexes": [ - { - "propertyNames": [ - "id" - ] - } - ], - "label": "person", - "mapping": "[6,1,4]", - "propertyDefList": [ - { - "data_type": "STRING", - "id": 6, - "name": "name" - }, - { - "data_type": "LONG", - "id": 1, - "name": "age" - }, - { - "data_type": "LONG", - "id": 4, - "name": "id" - } - ], - "rawRelationShips": [], - "reverse_mapping": "[-1,1,-1,-1,2,-1,0,-1]", - "type": "VERTEX", - "valid_properties": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1 - ] - }, - { - "id": 1, - "indexes": [ - { - "propertyNames": [ - "id" - ] - } - ], - "label": "software", - "mapping": "[6,5,4]", - "propertyDefList": [ - { - "data_type": "STRING", - "id": 6, - "name": "name" - }, - { - "data_type": "STRING", - "id": 5, - "name": "lang" - }, - { - "data_type": "LONG", - "id": 4, - "name": "id" - } - ], - "rawRelationShips": [], - "reverse_mapping": "[-1,-1,-1,-1,2,1,0,-1]", - "type": "VERTEX", - "valid_properties": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1 - ] - }, - { - "id": 2, - "indexes": [], - "label": "knows", - "mapping": "[3,2,7]", - "propertyDefList": [ - { - "data_type": "LONG", - "id": 3, - "name": "eid" - }, - { - "data_type": "LONG", - "id": 2, - "name": "edge_id" - }, - { - "data_type": "DOUBLE", - "id": 7, - "name": "weight" - } - ], - "rawRelationShips": [ - { - "dstVertexLabel": "person", - "srcVertexLabel": "person" - } - ], - "reverse_mapping": "[-1,-1,1,0,-1,-1,-1,2]", - "type": "EDGE", - "valid_properties": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1 - ] - }, - { - "id": 3, - "indexes": [], - "label": "created", - "mapping": "[3,2,7]", - "propertyDefList": [ - { - "data_type": "LONG", - "id": 3, - "name": "eid" - }, - { - "data_type": "LONG", - "id": 2, - "name": "edge_id" - }, - { - "data_type": "DOUBLE", - "id": 7, - "name": "weight" - } - ], - "rawRelationShips": [ - { - "dstVertexLabel": "software", - "srcVertexLabel": "person" - } - ], - "reverse_mapping": "[-1,-1,1,0,-1,-1,-1,2]", - "type": "EDGE", - "valid_properties": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1 - ] - } - ], - "uniquePropertyNames": "[\"age\",\"edge_id\",\"eid\",\"id\",\"lang\",\"name\",\"weight\"]" - } expr_modern_schema.json: |- { "entities": [ @@ -1225,4 +1019,5 @@ data: "is_table_id": true, "is_column_id": false } +{{ (.Files.Glob "config/**").AsConfig | indent 2 }} {{- end -}} diff --git a/charts/ir-standalone/templates/frontend/statefulset.yaml b/charts/gie-standalone/templates/frontend/statefulset.yaml similarity index 90% rename from charts/ir-standalone/templates/frontend/statefulset.yaml rename to charts/gie-standalone/templates/frontend/statefulset.yaml index d90afabb8167..f82329a5746c 100644 --- a/charts/ir-standalone/templates/frontend/statefulset.yaml +++ b/charts/gie-standalone/templates/frontend/statefulset.yaml @@ -52,8 +52,8 @@ spec: {{- end }} containers: - name: frontend - image: {{ include "graphscope-store.image" . }} - imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + image: {{ include "graphscope-store.frontend.image" . }} + imagePullPolicy: {{ .Values.frontend.image.pullPolicy | quote }} command: {{- if (eq "Vineyard" .Values.storageType) }} - /bin/bash @@ -73,10 +73,15 @@ spec: done runtime_hosts=${runtime_hosts:1} - config_path=/etc/groot/config + json_file=`ls /tmp/*.json` + while [ $? -ne 0 ] + do + sleep 1s + json_file=`ls /tmp/*.json` + done $GRAPHSCOPE_HOME/bin/giectl start_frontend ${GRAPHSCOPE_RUNTIME} ${object_id} \ - $config_path/$GRAPH_SCHEMA $runtime_hosts $GREMLIN_SERVER_PORT $EXTRA_CONFIG + $json_file $runtime_hosts $GREMLIN_SERVER_PORT $EXTRA_CONFIG exit_code=$? while [ $exit_code -eq 0 ] @@ -95,15 +100,17 @@ spec: {{- end }} env: - name: GAIA_RPC_PORT - value: {{ .Values.store.service.gaiaRpc | quote }} + value: {{ .Values.executor.service.gaiaRpc | quote }} - name: GREMLIN_SERVER_PORT value: {{ .Values.frontend.service.gremlinPort | quote }} - name: DNS_NAME_PREFIX_STORE value: {{ $storeFullname }}-{}.{{ $storeFullname }}-headless.{{ $releaseNamespace }}.svc.{{ $clusterDomain }} - name: SERVERSSIZE - value: {{ .Values.store.replicaCount | quote }} + value: {{ .Values.executor.replicaCount | quote }} + {{- if (eq "Experimental" .Values.storageType) }} - name: GRAPH_SCHEMA value: {{ .Values.schemaConfig | quote }} + {{- end }} - name: EXTRA_CONFIG value: {{ .Values.extraConfig | quote }} - name: WORKER_NUM diff --git a/charts/ir-standalone/templates/frontend/svc.yaml b/charts/gie-standalone/templates/frontend/svc.yaml similarity index 100% rename from charts/ir-standalone/templates/frontend/svc.yaml rename to charts/gie-standalone/templates/frontend/svc.yaml diff --git a/charts/ir-standalone/templates/serviceaccount.yaml b/charts/gie-standalone/templates/serviceaccount.yaml similarity index 100% rename from charts/ir-standalone/templates/serviceaccount.yaml rename to charts/gie-standalone/templates/serviceaccount.yaml diff --git a/charts/ir-standalone/templates/store/statefulset.yaml b/charts/gie-standalone/templates/store/statefulset.yaml similarity index 68% rename from charts/ir-standalone/templates/store/statefulset.yaml rename to charts/gie-standalone/templates/store/statefulset.yaml index db83f5473287..1eef24c6b744 100644 --- a/charts/ir-standalone/templates/store/statefulset.yaml +++ b/charts/gie-standalone/templates/store/statefulset.yaml @@ -17,21 +17,21 @@ metadata: annotations: {{- include "graphscope-store.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} {{- end }} spec: - replicas: {{ .Values.store.replicaCount }} + replicas: {{ .Values.executor.replicaCount }} selector: matchLabels: {{ include "graphscope-store.selectorLabels" . | nindent 6 }} app.kubernetes.io/component: store serviceName: {{ include "graphscope-store.store.fullname" . }}-headless updateStrategy: - type: {{ .Values.store.updateStrategy }} - {{- if (eq "Recreate" .Values.store.updateStrategy) }} + type: {{ .Values.executor.updateStrategy }} + {{- if (eq "Recreate" .Values.executor.updateStrategy) }} rollingUpdate: null {{- end }} template: metadata: annotations: - {{- if .Values.store.podAnnotations }} - {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.store.podAnnotations "context" $) | nindent 8 }} + {{- if .Values.executor.podAnnotations }} + {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.executor.podAnnotations "context" $) | nindent 8 }} {{- end }} labels: {{- include "graphscope-store.labels" . | nindent 8 }} app.kubernetes.io/component: store @@ -43,17 +43,17 @@ spec: imagePullSecrets: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "graphscope-store.serviceAccountName" . }} - {{- if .Values.store.affinity }} - affinity: {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.store.affinity "context" $) | nindent 8 }} + {{- if .Values.executor.affinity }} + affinity: {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.executor.affinity "context" $) | nindent 8 }} {{- end }} initContainers: - {{- if .Values.store.initContainers }} - {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.store.initContainers "context" $) | nindent 8 }} + {{- if .Values.executor.initContainers }} + {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.executor.initContainers "context" $) | nindent 8 }} {{- end }} containers: - name: engine - image: {{ include "graphscope-store.image" . }} - imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + image: {{ include "graphscope-store.store.image" . }} + imagePullPolicy: {{ .Values.executor.image.pullPolicy | quote }} command: {{- if (eq "Vineyard" .Values.storageType) }} - /bin/bash @@ -61,12 +61,9 @@ spec: - | sleep 5s - chmod +x $GRAPHSCOPE_HOME/bin/kube_ssh - sudo cp $GRAPHSCOPE_HOME/bin/kube_ssh /usr/bin/kube_ssh - hosts=/tmp/hosts_of_nodes rm -rf $hosts && touch $hosts - pod_ips=`kubectl get pod -lapp.kubernetes.io/component=store -o jsonpath='{.items[*].status.podIP}'` + pod_ips=`kubectl get pod -lapp.kubernetes.io/component=store,app.kubernetes.io/instance=$INSTANCE_NAME -o jsonpath='{.items[*].status.podIP}'` pod_names="" for ip in `echo $pod_ips` do @@ -80,15 +77,12 @@ spec: export VINEYARD_IPC_SOCKET=/tmp/vineyard.sock if [ $SERVERSSIZE -eq 1 ] then - $GRAPHSCOPE_HOME/../vineyard/vineyardd --socket=${VINEYARD_IPC_SOCKET} --meta=local & + vineyardd --socket=${VINEYARD_IPC_SOCKET} --meta=local & else - $GRAPHSCOPE_HOME/../vineyard/vineyardd --socket=${VINEYARD_IPC_SOCKET} --etcd_endpoint=$ETCD_ENDPOINT & + vineyardd --socket=${VINEYARD_IPC_SOCKET} --etcd_endpoint=$ETCD_ENDPOINT & fi # load vineyard graph - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(python3 -c "import os; import vineyard; print(os.path.dirname(vineyard.__file__))") - export OPAL_PREFIX=$(python3 -c "import os; import graphscope; print(os.path.abspath(os.path.dirname(graphscope.__file__) + '/../graphscope.runtime/openmpi'))") - config_path=/etc/groot/config [[ `hostname` =~ -([0-9]+)$ ]] || exit 1 @@ -101,7 +95,7 @@ spec: -x OPAL_PREFIX=$OPAL_PREFIX \ -x VINEYARD_IPC_SOCKET=$VINEYARD_IPC_SOCKET \ -x config_path=$config_path \ - $GRAPHSCOPE_HOME/bin/vineyard_htap_loader --config $config_path/$HTAP_LOADER_CONFIG + vineyard-graph-loader --config $config_path/$HTAP_LOADER_CONFIG if [ $? -ne 0 ] then echo "load vineyard graph fail" @@ -117,6 +111,16 @@ spec: done object_id=${json_file//[^0-9]/} + # write schema json to remote frontend + if [ $server_id -eq 0 ] + then + frontend_pods=`kubectl get pod -lapp.kubernetes.io/component=frontend,app.kubernetes.io/instance=$INSTANCE_NAME --no-headers -o custom-columns=":metadata.name"` + for pod in $(echo $frontend_pods) + do + kubectl cp $json_file $pod:$json_file + done + fi + # start engine service pegasus_hosts="" i=0 @@ -146,65 +150,69 @@ spec: {{- end }} env: - name: GAIA_RPC_PORT - value: {{ .Values.store.service.gaiaRpc | quote }} + value: {{ .Values.executor.service.gaiaRpc | quote }} - name: GAIA_ENGINE_PORT - value: {{ .Values.store.service.gaiaEngine | quote }} + value: {{ .Values.executor.service.gaiaEngine | quote }} - name: STORE_DATA_PATH value: {{ .Values.storeDataPath | quote }} - name: DNS_NAME_PREFIX_STORE value: {{ $storeFullname }}-{}.{{ $storeFullname }}-headless.{{ $releaseNamespace }}.svc.{{ $clusterDomain }} - name: SERVERSSIZE - value: {{ .Values.store.replicaCount | quote }} + value: {{ .Values.executor.replicaCount | quote }} {{- if (eq "Vineyard" .Values.storageType) }} - name: ETCD_ENDPOINT value: {{ .Values.etcdEndpoint | quote }} - name: HTAP_LOADER_CONFIG value: {{ .Values.htapLoaderConfig | quote }} + - name: INSTANCE_NAME + value: {{ .Release.Name | quote }} {{- end }} ports: - name: gaia-rpc - containerPort: {{ .Values.store.service.gaiaRpc }} + containerPort: {{ .Values.executor.service.gaiaRpc }} - name: gaia-engine - containerPort: {{ .Values.store.service.gaiaEngine }} - {{- if .Values.store.resources }} - resources: {{- toYaml .Values.store.resources | nindent 12 }} + containerPort: {{ .Values.executor.service.gaiaEngine }} + {{- if .Values.executor.resources }} + resources: {{- toYaml .Values.executor.resources | nindent 12 }} {{- end }} volumeMounts: + {{- if eq (hasPrefix "hdfs" .Values.storeDataPath) false }} - name: data mountPath: {{ .Values.storeDataPath }} + {{- end }} - name: config mountPath: /etc/groot/config volumes: - name: config configMap: name: {{ include "graphscope-store.configmapName" . }} - {{- if and .Values.store.persistence.enabled .Values.store.persistence.existingClaim }} + {{- if and .Values.executor.persistence.enabled .Values.executor.persistence.existingClaim }} - name: data persistentVolumeClaim: - claimName: {{ tpl .Values.store.persistence.existingClaim . }} - {{- else if not .Values.store.persistence.enabled }} + claimName: {{ tpl .Values.executor.persistence.existingClaim . }} + {{- else if not .Values.executor.persistence.enabled }} - name: data emptyDir: {} - {{- else if and .Values.store.persistence.enabled (not .Values.store.persistence.existingClaim) }} + {{- else if and .Values.executor.persistence.enabled (not .Values.executor.persistence.existingClaim) }} volumeClaimTemplates: - metadata: name: data labels: {{ include "graphscope-store.selectorLabels" . | nindent 10 }} app.kubernetes.io/component: store - {{- if .Values.store.persistence.annotations }} + {{- if .Values.executor.persistence.annotations }} annotations: - {{- toYaml .Values.store.persistence.annotations | nindent 10 }} + {{- toYaml .Values.executor.persistence.annotations | nindent 10 }} {{- end }} spec: accessModes: - {{- range .Values.store.persistence.accessModes }} + {{- range .Values.executor.persistence.accessModes }} - {{ . | quote }} {{- end }} resources: requests: - storage: {{ .Values.store.persistence.size | quote }} - {{ include "graphscope-store.storage.class" .Values.store.persistence }} - {{- if .Values.store.persistence.selector }} - selector: {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.store.persistence.selector "context" $) | nindent 10 }} + storage: {{ .Values.executor.persistence.size | quote }} + {{ include "graphscope-store.storage.class" .Values.executor.persistence }} + {{- if .Values.executor.persistence.selector }} + selector: {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.executor.persistence.selector "context" $) | nindent 10 }} {{- end -}} {{- end }} diff --git a/charts/ir-standalone/templates/store/svc-headless.yaml b/charts/gie-standalone/templates/store/svc-headless.yaml similarity index 89% rename from charts/ir-standalone/templates/store/svc-headless.yaml rename to charts/gie-standalone/templates/store/svc-headless.yaml index 3fb491627c87..4ac53c201e2c 100644 --- a/charts/ir-standalone/templates/store/svc-headless.yaml +++ b/charts/gie-standalone/templates/store/svc-headless.yaml @@ -18,10 +18,10 @@ spec: publishNotReadyAddresses: true ports: - name: gaia-rpc - port: {{ .Values.store.service.gaiaRpc }} + port: {{ .Values.executor.service.gaiaRpc }} targetPort: gaia-rpc - name: gaia-engine - port: {{ .Values.store.service.gaiaEngine }} + port: {{ .Values.executor.service.gaiaEngine }} targetPort: gaia-engine selector: {{- include "graphscope-store.selectorLabels" . | nindent 4 }} app.kubernetes.io/component: store diff --git a/charts/ir-standalone/tools/etcd.yaml b/charts/gie-standalone/tools/etcd.yaml similarity index 100% rename from charts/ir-standalone/tools/etcd.yaml rename to charts/gie-standalone/tools/etcd.yaml diff --git a/charts/ir-standalone/tools/pvc.yaml b/charts/gie-standalone/tools/pvc.yaml similarity index 100% rename from charts/ir-standalone/tools/pvc.yaml rename to charts/gie-standalone/tools/pvc.yaml diff --git a/charts/ir-standalone/values.yaml b/charts/gie-standalone/values.yaml similarity index 87% rename from charts/ir-standalone/values.yaml rename to charts/gie-standalone/values.yaml index b0a4d0b0cdd0..f7380a1a3cba 100644 --- a/charts/ir-standalone/values.yaml +++ b/charts/gie-standalone/values.yaml @@ -2,24 +2,6 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. -image: - registry: registry.cn-hongkong.aliyuncs.com - repository: graphscope/gie-exp-runtime - # Overrides the image tag whose default is the chart appVersion. - tag: "" - ## Specify a imagePullPolicy - ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' - ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images - ## - pullPolicy: IfNotPresent - ## Optionally specify an array of imagePullSecrets (secrets must be manually created in the namespace) - ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ - ## Example: - ## pullSecrets: - ## - myRegistryKeySecretName - ## - pullSecrets: [] - nameOverride: "" fullnameOverride: "" @@ -40,14 +22,17 @@ commonLabels: {} javaOpts: "" ## Vineyard or Experimental Storage -storageType: Experimental +storageType: Vineyard ## Store Config storeDataPath: "/tmp/data" -## Frontend Config +## schema needed by compiler when using experimental store schemaConfig: "expr_modern_schema.json" +## mappings from csv to data type in vineyard store +htapLoaderConfig: "v6d_modern_loader.json" + ## Pegasus Config pegasusWorkerNum: 2 @@ -57,9 +42,6 @@ pegasusBatchSize: 1024 pegasusOutputCapacity: 16 -## the following configurations are necessary when using vineyard store -htapLoaderConfig: "v6d_modern_loader.json" - ## need by vineyard in distributed env etcdEndpoint: "etcd-for-vineyard.default.svc.cluster.local:2379" @@ -90,7 +72,25 @@ serviceAccount: ## GraphScope Store parameters ## -store: +executor: + image: + registry: registry.cn-hongkong.aliyuncs.com + repository: graphscope/interactive-executor + # Overrides the image tag whose default is the chart appVersion. + tag: "0.20.0" + ## Specify a imagePullPolicy + ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' + ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images + ## + pullPolicy: IfNotPresent + ## Optionally specify an array of imagePullSecrets (secrets must be manually created in the namespace) + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## Example: + ## pullSecrets: + ## - myRegistryKeySecretName + ## + pullSecrets: [ ] + replicaCount: 1 ## updateStrategy for GraphScope Store statefulset @@ -256,6 +256,24 @@ store: ## GraphScope Frontend parameters ## frontend: + image: + registry: registry.cn-hongkong.aliyuncs.com + repository: graphscope/interactive-frontend + # Overrides the image tag whose default is the chart appVersion. + tag: "0.20.0" + ## Specify a imagePullPolicy + ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' + ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images + ## + pullPolicy: IfNotPresent + ## Optionally specify an array of imagePullSecrets (secrets must be manually created in the namespace) + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## Example: + ## pullSecrets: + ## - myRegistryKeySecretName + ## + pullSecrets: [ ] + replicaCount: 1 ## updateStrategy for GraphScope Store statefulset diff --git a/charts/ir-standalone/README.md b/charts/ir-standalone/README.md deleted file mode 100644 index 34e1d5b59254..000000000000 --- a/charts/ir-standalone/README.md +++ /dev/null @@ -1,98 +0,0 @@ -# IR Standalone Deployment -## Directory Structure -``` -├── ir-standalone -│   ├── Chart.yaml -│   ├── README.md -│   ├── etcd.yaml -│   ├── pvc.yaml -│   ├── templates -│   │   ├── _helpers.tpl -│   │   ├── configmap.yaml -│   │   ├── frontend -│   │   │   ├── statefulset.yaml -│   │   │   └── svc.yaml -│   │   ├── serviceaccount.yaml -│   │   └── store -│   │   ├── statefulset.yaml -│   │   └── svc-headless.yaml -│   └── values.yaml -└── role_and_binding.yaml -``` -## Prepare Dependencies (just initialize once) -### rbac authorization -``` -kubectl apply -f role_and_binding.yaml -``` -### etcd -``` -cd GraphScope/charts -kubectl apply -f ir-standalone/tools/etcd.yaml -``` -### prepare graph data -- prepare raw data -``` -# for vineyard store (there are some sample data for tests under the `resource` directory, just copy them to the target directory) -cp -r GraphScope/interactive_engine/tests/src/main/resources/ /tmp/data/ - -# for experimental store (experimental storage will create modern graph for tests by default, prepare your own raw data under the directories of graph_schema and graph_data_bin if you need other graph data for benchmark) -cp -r graph_schema /tmp/data/ -cp -r graph_data_bin /tmp/data -``` -- config `ir-standalone/pvc.yaml` -``` -hostPath: - path: /tmp/data # be consistent with the directory where the graph data is stored -``` -- create pvc and pv -``` -kubectl apply -f ir-standalone/tools/pvc.yaml -``` -## Getting Started -### config `ir-standalone/values.yaml` -``` -# docker artifacts -image: - registry: registry.cn-hongkong.aliyuncs.com - repository: graphscope/gie-exp-runtime - tag: "" - -# store num -store: - replicaCount: 1 - -# storage type: Experimental or Vineyard -storageType: Experimental - -# need by compiler service to access meta, the concrete content is in ir-standalone/templates/configmap.yaml -schemaConfig: "exp_modern_schema.json" - -# gremlin service port -gremlinPort: 12312 - -# Pegasus Config -pegasusWorkerNum: 2 -pegasusTimeout: 240000 -pegasusBatchSize: 1024 -pegasusOutputCapacity: 16 - -# pvc used by pod instance (default is the pvc created above) -existingClaim: "test-graphscope-store-pvc" - -# extra configurations if based on vineyard storage -htapLoaderConfig: "v6d_modern_loader.json" # need by vineyard instance to load raw data into in-memory graph structure, the concrete content is in ir-standalone/templates/configmap.yaml -``` -### start ir deployment -``` -helm install ir-standalone -``` -### stop ir deployment -``` -helm delete -``` -### get service endpoint -``` -minikube tunnel # execute in advance if in minikube environment - -kubectl get svc | grep frontend # EXTERNAL-IP:12312 -``` \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 30190c113387..0342f23cc9f0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -59,6 +59,7 @@ and the vineyard store that offers efficient in-memory data transfers. :caption: Graph Interactive Engine interactive_engine/getting_started + interactive_engine/deployment interactive_engine/guide_and_examples interactive_engine/design_of_gie interactive_engine/supported_gremlin_steps diff --git a/docs/interactive_engine/deployment.md b/docs/interactive_engine/deployment.md index de462497cde1..3bdb77b2894f 100644 --- a/docs/interactive_engine/deployment.md +++ b/docs/interactive_engine/deployment.md @@ -1,2 +1,241 @@ -# Deployment for GIE +# Standalone Deployment for GIE + +We have demonstrated [how to execute interactive queries](./getting_started.md) easily by installing Graphscope via `pip` on a local machine. However, in real-life applications, graphs are often too large to fit on a single machine. In such cases, Graphscope can be deployed on a cluster, such as a [self-managed k8s cluster](../deploy_graphscope_on_self_managed_k8s.md), for processing large-scale graphs. But you may wonder, "what if I only need the GIE engine and not the whole package that includes GAE and GLE?" This tutorial will walk you through the process of standalone deployment of GIE on a self-managed k8s cluster. + +Throughout the tutorial, we assume all machines are running Linux system. +We do not guarantee that it works as smoothly as Linux on the other platform. +For your reference, we've tested the tutorial on Ubuntu 20.04. + +## The K8s Cluster +If you do not have a K8s cluster to work on, don't worry. We have three simple ways for you to create one and get started with the deployment: + +- Use a K8s cluster from Cloud Providers like [ACK](https://www.aliyun.com/product/kubernetes) from Alibaba Cloud. +- Create a K8s cluster using [kubeadm](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/). +- Create a local K8s cluster using [minikube](https://minikube.sigs.k8s.io/docs/start/): + ```Bash + # Install `minikube` on your platform + # Recommend using `none` driver on a Linux machine to free from loading image to control plane. + # Check https://minikube.sigs.k8s.io/docs/handbook/pushing/ for details. + minikube start --driver=none + ``` +- Use a local k8s cluster in [docker desktop](https://docs.docker.com/desktop/kubernetes/). + +To learn more about the creation of a k8s cluster, please refer to the [official guide](https://kubernetes.io/zh-cn/docs/tutorials/kubernetes-basics/create-cluster/). + + +## Deploy Your First GIE Service + +The easiest way to deploy GIE standalone is by using Helm, which is a package manager for K8s that simplifies the +deployment and management of applications. To deploy GIE standalone using Helm, you can follow these steps: + +- Install Helm on your local machine if you do not have it by following the + instructions on the [official Helm website](https://helm.sh/docs/intro/install/). +- Pull the Helm repository to your local disk: + ```bash + helm pull graphscope/gie-standalone --untar + ``` +- Prepare the `etcd` pod. + ```bash + kubectl apply -f gie-standalone/tools/etcd.yaml + ``` +- Prepare graph data + ``` + cp -r gie-standalone/data /tmp/data/ + ``` + Check whether the raw data is there: + ``` + ls -l /tmp/data + ``` + You should be able to see the raw data of the [modern graph](https://tinkerpop.apache.org/docs/3.6.2/tutorials/getting-started/). + ``` + data + └── modern_graph + ├── created.csv + ├── knows.csv + ├── person.csv + └── software.csv + ``` + Then create K8s persistent volume (PV) and persistent volume claim (PVC). + ```bash + kubectl apply -f gie-standalone/tools/pvc.yaml + ``` + The modern graph raw data in `/tmp/data` will be automatically loaded into the GIE graph store (by default on [Vineyard](https://v6d.io)). + + ```{tip} + You can load the data from any `/path/to/your/data`. All you need to do is copy the raw data to `/path/to/your/data` + and modify the `hostPath.path` in `gie-standalone/tools/pvc.yaml` to `/path/to/your/data`. + ``` + +- Install the GIE chart: + ``` + helm install [YOUR_RELEASE_NAME] gie-standalone + ``` +- Verify that the GIE service is running: + ``` + kubectl get pods + ``` + You should see the `[YOUR_RELEASE_NAME]-gie-standalone-frontend-0` and `[YOUR_RELEASE_NAME]-gie-standalone-store-0` pods running. + +- Get the endpoint of the GIE Frontend service: + ``` + kubectl describe svc [YOUR_RELEASE_NAME]-gie-standalone-frontend \ + | grep "Endpoints:" | awk -F' ' '{print $2}' + ``` + You should see the GIE Frontend service endpoint as `:`. + +- Connect to the GIE frontend service using the official Python SDK or Gremlin console. + - From Python SDK. + ```Python + import sys + from gremlin_python import statics + from gremlin_python.structure.graph import Graph + from gremlin_python.process.graph_traversal import __ + from gremlin_python.process.strategies import * + from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection + + graph = Graph() + gremlin_endpoint = # the endpoint you've obtained from step 6. + remoteConn = DriverRemoteConnection('ws://' + gremlin_endpoint + '/gremlin','g') + g = graph.traversal().withRemote(remoteConn) + + res = g.V().count().next() + assert res == 6 + ``` + + - From Gremlin Console. + + Download Gremlin console and unpack to your local directory. + ```bash + curl -LO https://dlcdn.apache.org/tinkerpop/3.6.2/apache-tinkerpop-gremlin-console-3.6.2-bin.zip && \ + unzip apache-tinkerpop-gremlin-console-3.6.2-bin.zip + ``` + + Modify the `hosts` and `port` to the GIE Frontend Service endpoint in + ```bash + apache-tinkerpop-gremlin-console-3.6.2-bin/conf/remote.yaml + ``` + + Then open the Gremlin console + ```bash + chmod +x apache-tinkerpop-gremlin-console-3.6.2-bin/bin/gremlin.sh + apache-tinkerpop-gremlin-console-3.6.2-bin/bin/gremlin.sh + ``` + + Type in the following: + ```bash + gremlin> :remote connect tinkerpop.server conf/remote.yaml + gremlin> :remote console + gremlin> g.V().count() + ==> 6 + gremlin> + ``` + + You are now ready to commit any Gremlin queries via either the Python SDK or Gremlin console. + +## Remove the GIE Service +```bash + helm uninstall [YOUR_RELEASE_NAME] +``` + + +## Using Your Own Data +Currently, a single instance of GIE can only handle one set of graph data. This means that you must +indicate which raw data should be uploaded into GIE's graph store, and all subsequent queries made +through the GIE instance will pertain to the uploaded graph. + +The above tutorial uses modern graph to demonstrate the launching procedural. However, it's easy to +specify your own data. To do so, you just need to provide a little specification about your data. + +Let's look into the specification of modern graph in `gie-standalone/config/v6d_modern_loader.json`: +```json +{ + "vertices": [ + { + "data_path": "$STORE_DATA_PATH/modern_graph/person.csv", + "label": "person", + "options": "header_row=true&delimiter=|" + }, + { + "data_path": "$STORE_DATA_PATH/modern_graph/software.csv", + "label": "software", + "options": "header_row=true&delimiter=|" + } + ], + "edges": [ + { + "data_path": "$STORE_DATA_PATH/modern_graph/knows.csv", + "label": "knows", + "src_label": "person", + "dst_label": "person", + "options": "header_row=true&delimiter=|" + }, + { + "data_path": "$STORE_DATA_PATH/modern_graph/created.csv", + "label": "created", + "src_label": "person", + "dst_label": "software", + "options": "header_row=true&delimiter=|" + } + ], + "directed": 1, + "generate_eid": 1, + "string_oid": 0, + "local_vertex_map": 0, + "print_normalized_schema": 1 +} +``` + +There're a few things to notice: +- For now, we support loading raw data that are a CSV-like files. +- Prepare an individual file for each type of vertex and edge. For example, in the modern + graph, the data of "person" vertex is in the file of `modern/person.csv`. +- Place the raw data in the `hostPath.path` specified above. +- For each type of vertex, configure + - `data_path`: as `hostPath.path`. The default value is `/tmp/data`. + - `label`: the label of the vertex. For example, "person", "software". + - `options`: configure as "key1=value1&key2=value2&...". Details can be found in this [guide](https://github.com/v6d-io/v6d/tree/main/modules/graph), while we provide some useful keys here: + - `header_row`: define whether the file contains a header, the default value is `false`. + - `delimiter`: the token that separates the data fields of a row of data, the default value is `','`. + - `column_types`: the data types of all data fields separated by the `delimiter`. If not specified, such as in the modern graph example, the store will attempt to infer the data types from the raw data. + You can also specify according to your need. For example, if there're two data fields, "filed1" and "filed2", you can specify `column_types=string,int64_t` to indicate their types. +- For each type of edge, configure + - `data_path`, `label`, `options` are similar to those of vertices. To save you from some + unexpected trouble, you'd better make the first two data fields record the ids of the source and destination vertices, and if `column_types` is given, the first two data fields are configured + to `int64_t` correspondingly. + - `src_label`: the label of the source vertex of this edge. + - `dst_label`: the label of the destination vertex of this edge. + +```{tip} +For your reference, we have provided a sample for loading LDBC data in `gie-standalone/config/v6d_ldbc_loader.json`. +``` + +## Deploy on a Cluster +In K8s, it’s convenient to deploy GIE in a cluster with multiple machines. +You don’t need to be aware of the physical machines, but simply configure the number of executors +to make GIE scalable. These GIE executors will be seamlessly assigned by K8s to the physical machines. + +You simply set the number of executors as: +``` +helm install [YOUR_RELEASE_NAME] graphscope/gie-standalone --set executor.replicaCount=3 +``` + +This instruction deploys the GIE chart using 3 executors that process graph partitions in v6d. +The number of replicas can be modified according to your needs, but better be less than the number +of CPUs in your cluster. When specifying the number of executors, v6d loads data from the specified +location and partitions graph data automatically for each executor. It is recommended to store data +in a distributed file system like `HDFS` for convenience. In this case, you can simply configure +the above `data_path` to use the `hdfs://` scheme. + + +## Other Useful Configurations +Extra configurations can be set as: +```bash +helm install [YOUR_RELEASE_NAME] graphscope/gie-standalone --set [key1]=[value1],[key2]=[value2] +``` +We've listed useful configuration keys in the following: + +- gremlinPort: the port for accessing the Gremlin service (Default: 8182). +- pegasusWorkerNum: the number of working threads per each executor (Default: 2). + Obviously, the total number of working threads is: 'executor.replicaCount x pegasusWorkerNum' . +- pegasusTimeout: The maximum dueration in `ms` you allow each query to run (Default: 24,000). diff --git a/docs/interactive_engine/design_of_gie.md b/docs/interactive_engine/design_of_gie.md index a356f617c94c..f0d5bc1847c8 100644 --- a/docs/interactive_engine/design_of_gie.md +++ b/docs/interactive_engine/design_of_gie.md @@ -36,7 +36,7 @@ role in massive data analysis. However, in practice, applying graph queries also :::{figure-md} -The current states of graph queries @@ -63,7 +63,7 @@ languages and N sets of code generation for N engines. :::{figure-md} -The architecture of GIE diff --git a/docs/interactive_engine/getting_started.md b/docs/interactive_engine/getting_started.md index 20efd05e707e..a58fbc879cee 100644 --- a/docs/interactive_engine/getting_started.md +++ b/docs/interactive_engine/getting_started.md @@ -89,13 +89,13 @@ You may see something like: The number 6 is printed, which is the number of vertices in modern graph. - ## What's the Next As shown in the above example, it is very easy to use GraphScope to interactively query a graph using the gremlin query language on your local machine. You may find more tutorials [here](https://tinkerpop.apache.org/docs/current/tutorials/getting-started/) for the basic Gremlin usage, in which most read-only queries can be seamlessly executed with the above `g.execute()` function. In addition to the above local-machine entr\'ee, we have prepared the following topics for your reference. -- GIE can process complex workloads such as the LDBC business intelligence workloads. [A walk-through tutorial is here](./ldbc_tutorial) -- GIE can work in a distributed environment to process very large graph. [How to do that?](./deployment) -- GIE has supported a lot of standard Gremlin steps, together with many useful syntactic sugars. [Please look into the details](./supported_gremlin_steps) -- Want to know more about the technical details of GIE. [This is the design and architecture of GIE](./design_of_gie) +- GIE can handle much complex cases, for example, the complex LDBC + business intelligence workloads. [A walk-through tutorial is here!](./guide_and_examples) +- GIE can be deployed in a distributed environment to process very large graph. [How to do that?](./deployment) +- GIE has supported a lot of standard Gremlin steps, together with many useful syntactic sugars. [Please look into the details!](./supported_gremlin_steps) +- Want to learn more about the technique details of GIE. [This is the design and architecture of GIE!](./design_of_gie) diff --git a/docs/interactive_engine/tutorial_ldbc_gremlin.md b/docs/interactive_engine/tutorial_ldbc_gremlin.md index 91734c5fbe4e..727926840a86 100644 --- a/docs/interactive_engine/tutorial_ldbc_gremlin.md +++ b/docs/interactive_engine/tutorial_ldbc_gremlin.md @@ -37,6 +37,12 @@ graph = load_ldbc() This will load the LDBC social network with the scale factor (sf) 1. +```{tip} +We do permit loading much larger LDBC graphs, such as sf 3k. +To handle such large graphs, you're recommended using the [standalone deployment of GIE](./deployment.md) +in a large cluster. +``` + Currently, GIE supports Gremlin as its query language. After loading the LDBC graph and initializing the engine, we can submit gremlin queries to GIE through `g.execute(GREMLIN_QUERIES)` easily.