Skip to content

Commit

Permalink
Enable deploy groot on local environment (#2769)
Browse files Browse the repository at this point in the history
- Enable deploy groot on local
- Split data-load-tools from groot server
- Fix some unit tests of groot profile
  • Loading branch information
siyuan0322 committed May 30, 2023
1 parent 2509958 commit 983a41e
Show file tree
Hide file tree
Showing 22 changed files with 408 additions and 143 deletions.
22 changes: 12 additions & 10 deletions .github/workflows/gss.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@ jobs:
- name: Upload tools for helm test to Artifact
uses: actions/upload-artifact@v3
with:
name: groot.tar.gz
path: interactive_engine/assembly/target/groot.tar.gz
name: groot
path: |
interactive_engine/assembly/target/groot.tar.gz
interactive_engine/data-load-tool/target/data-load-tool-0.0.1-SNAPSHOT.jar
retention-days: 5

helm-test:
Expand All @@ -130,7 +132,7 @@ jobs:
- uses: actions/download-artifact@v3
with:
name: groot.tar.gz
name: groot
path: artifacts

- name: Set GITHUB_ENV
Expand All @@ -140,6 +142,9 @@ jobs:
- name: Prepare Image
run: |
ls -la artifacts/*/*
mv artifacts/assembly/target/groot.tar.gz artifacts/
mv artifacts/data-load-tool/target/data-load-tool-0.0.1-SNAPSHOT.jar artifacts/
docker build -t ${{ env.GSS_IMAGE }}:${SHORT_SHA} \
-f .github/workflows/docker/graphscope-store.Dockerfile .
Expand Down Expand Up @@ -170,10 +175,6 @@ jobs:
- name: Create the kubernetes cluster
run: |
# groot.tar.gz is needed for offline_load.sh
# see .github/workflows/hadoop_scripts/offline_load.sh.template
pushd artifacts
tar -zxf ./groot.tar.gz
# download gstest
git clone -b master --single-branch --depth=1 https://github.com/7br/gstest.git ${GS_TEST_DIR}
Expand Down Expand Up @@ -234,9 +235,10 @@ jobs:
./prepare_hadoop.sh /tmp/hadoop-2.10.1
export PATH=${PATH}:/tmp/hadoop-2.10.1/bin
REPLACE_STR=${GITHUB_WORKSPACE}/artifacts/groot
sed s/GROOT_HOME/${REPLACE_STR//\//\\/}/ offline_load.sh.template > offline_load.sh
chmod +x offline_load.sh
# data-load-tool is needed for offline_load.sh
# see .github/workflows/hadoop_scripts/offline_load.sh
export LOADER_DIR=${GITHUB_WORKSPACE}/artifacts
export LOAD_DATA_SCRIPT=${GITHUB_WORKSPACE}/.github/workflows/hadoop_scripts/offline_load.sh
sed s/GRAPH_ENDPOINT/$NODE_IP:$GRPC_PORT/ databuild.config.template > databuild.config
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/hadoop_scripts/offline_load.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

BASE_DIR=$(dirname "$0")

/tmp/hadoop-2.10.1/bin/hadoop jar $LOADER_DIR/data-load-tool-0.0.1-SNAPSHOT.jar com.alibaba.graphscope.groot.dataload.databuild.OfflineBuild $BASE_DIR/databuild.config

7 changes: 0 additions & 7 deletions .github/workflows/hadoop_scripts/offline_load.sh.template

This file was deleted.

2 changes: 1 addition & 1 deletion charts/graphscope-store/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,5 +124,5 @@ data:
export LOG_NAME=graphscope-store
export GROOT_CONF_FILE=/etc/groot/groot.config
${GRAPHSCOPE_HOME}/groot/bin/store_ctl.sh start_server ${ROLE}
${GRAPHSCOPE_HOME}/groot/bin/store_ctl.sh start ${ROLE}
{{- end -}}
13 changes: 11 additions & 2 deletions interactive_engine/assembly/groot.xml
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,25 @@

<dependencySets>
<dependencySet>
<scope>runtime</scope>
<outputDirectory>lib</outputDirectory>
<excludes>
<exclude>com.alibaba.graphscope:data-load-tool</exclude>
</excludes>
</dependencySet>
<dependencySet>
<outputDirectory>loader</outputDirectory>
<scope>test</scope>
<outputDirectory>lib</outputDirectory>
<includes>
<include>org.apache.curator:curator-test</include>
</includes>
</dependencySet>
<dependencySet>
<scope>provided</scope>
<includes>
<include>com.alibaba.graphscope:data-load-tool</include>
<include>org.scala-lang:scala-library</include>
</includes>
<outputDirectory>lib</outputDirectory>
</dependencySet>
</dependencySets>
</assembly>
6 changes: 3 additions & 3 deletions interactive_engine/assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@
</dependency>
<dependency>
<groupId>com.alibaba.graphscope</groupId>
<artifactId>data-load-tool</artifactId>
<artifactId>executor</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.graphscope</groupId>
<artifactId>executor</artifactId>
<artifactId>c-end-lgraph</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba.graphscope</groupId>
<artifactId>c-end-lgraph</artifactId>
<artifactId>data-load-tool</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
Expand Down
16 changes: 16 additions & 0 deletions interactive_engine/assembly/src/bin/groot/start_local_cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash

declare -r GROOT_DIR=${GROOT_HOME:-/usr/local/groot}
declare -r CONFIG_FILE="/tmp/groot.config"

sed "s@LOG4RS_CONFIG@${GROOT_DIR}/conf/log4rs.yml@" ${GROOT_DIR}/conf/config.template > ${CONFIG_FILE}

GROOT_CONF_FILE=${CONFIG_FILE} ${GROOT_DIR}/bin/store_ctl.sh start

# Start container with port mapping
# docker run -p 12312:12312 -p 55556:55556 graphscope/graphscope-store:latest /usr/local/groot/bin/start_local_cluster.sh

# pip3 install graphscope_client --user
# export NODE_IP="127.0.0.1"
# export GREMLIN_PORT="12312"
# export GRPC_PORT="55556"
168 changes: 82 additions & 86 deletions interactive_engine/assembly/src/bin/groot/store_ctl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,93 +5,81 @@
set -xeo pipefail

usage() {
cat <<END
cat <<END
A script to launch groot service.
Usage: store_ctl.sh [options] [command] [parameters]
Usage: store_ctl.sh [options] [parameters]
Options:
-h, --help output help information
-h, --help Output help information
Commands:
start_max_node start max_node of gaia
start_server start individual groot server
start Start individual groot server. If no arguments given, start all servers as local deployment
END
}

# a function to setup common variable and env
_setup_env() {
declare script="$0"
if [ -z "${GROOT_HOME}" ]; then
# set GROOT_HOME base location of the script
while [ -h "${script}" ] ; do
ls=$(ls -ld "${script}")
# Drop everything prior to ->
link=$(expr "${ls}" : '.*-> \(.*\)$')
if expr "${link}" : '/.*' > /dev/null; then
script="${link}"
else
script="$(dirname ${script})/${link}"
fi
done
GROOT_HOME=$(dirname "${script}")
GROOT_HOME=$(cd "${GROOT_HOME}"; pwd)
readonly GROOT_HOME=$(dirname ${GROOT_HOME})
fi

if [ -z "${GROOT_CONF_DIR}" ]; then
readonly GROOT_CONF_DIR="${GROOT_HOME}/conf"
fi

if [ -z "${GROOT_LOGBACK_FILE}" ]; then
readonly GROOT_LOGBACK_FILE="${GROOT_CONF_DIR}/logback.xml"
fi

if [ -z "${GROOT_CONF_FILE}" ]; then
readonly GROOT_CONF_FILE="${GROOT_CONF_DIR}/groot.config"
fi

if [ -z "${LOG_NAME}" ]; then
readonly LOG_NAME="groot"
fi

export LD_LIBRARY_PATH=${GROOT_HOME}/native:${GROOT_HOME}/native/lib:${LD_LIBRARY_PATH}:/usr/local/lib

if [ -z "${LOG_DIR}" ]; then
GS_LOG="/var/log/graphscope"
if [[ ! -d "${GS_LOG}" || ! -w "${GS_LOG}" ]]; then
# /var/log/graphscope is not existed/writable, switch to ${HOME}/.local/log/graphscope
GS_LOG=${HOME}/.local/log/graphscope
fi
readonly GS_LOG
export LOG_DIR=${GS_LOG}
fi

mkdir -p ${LOG_DIR}

libpath="$(echo "${GROOT_HOME}"/lib/*.jar | tr ' ' ':')"
}

# start max_node of gaia
start_max_node() {
type=$1; shift
_setup_env

java -server \
-Dlogback.configurationFile="${GROOT_LOGBACK_FILE}" \
-Dconfig.file="${GROOT_CONF_FILE}" \
-Dlog.dir="${LOG_DIR}" \
-Dlog.name="${LOG_NAME}" \
-cp "${libpath}" com.alibaba.graphscope.groot.servers.MaxNode \
"$@" > >(tee -a "${LOG_DIR}/${LOG_NAME}.out") 2> >(tee -a "${LOG_DIR}/${LOG_NAME}.err" >&2)
declare script="$0"
if [ -z "${GROOT_HOME}" ]; then
# set GROOT_HOME base location of the script
while [ -h "${script}" ]; do
ls=$(ls -ld "${script}")
# Drop everything prior to ->
link=$(expr "${ls}" : '.*-> \(.*\)$')
if expr "${link}" : '/.*' >/dev/null; then
script="${link}"
else
script="$(dirname ${script})/${link}"
fi
done
GROOT_HOME=$(dirname "${script}")
GROOT_HOME=$(
cd "${GROOT_HOME}"
pwd
)
readonly GROOT_HOME=$(dirname ${GROOT_HOME})
fi

if [ -z "${GROOT_CONF_DIR}" ]; then
readonly GROOT_CONF_DIR="${GROOT_HOME}/conf"
fi

if [ -z "${GROOT_LOGBACK_FILE}" ]; then
readonly GROOT_LOGBACK_FILE="${GROOT_CONF_DIR}/logback.xml"
fi

if [ -z "${GROOT_CONF_FILE}" ]; then
readonly GROOT_CONF_FILE="${GROOT_CONF_DIR}/groot.config"
fi

if [ -z "${LOG_NAME}" ]; then
readonly LOG_NAME="groot"
fi

export LD_LIBRARY_PATH=${GROOT_HOME}/native:${GROOT_HOME}/native/lib:${LD_LIBRARY_PATH}:/usr/local/lib

if [ -z "${LOG_DIR}" ]; then
GS_LOG="/var/log/graphscope"
if [[ ! -d "${GS_LOG}" || ! -w "${GS_LOG}" ]]; then
# /var/log/graphscope is not existed/writable, switch to ${HOME}/.local/log/graphscope
GS_LOG=${HOME}/.local/log/graphscope
fi
readonly GS_LOG
export LOG_DIR=${GS_LOG}
fi

mkdir -p ${LOG_DIR}

libpath="$(echo "${GROOT_HOME}"/lib/*.jar | tr ' ' ':')"
}

# start groot server
start_server() {
_setup_env
java_opt="-server
_setup_env
java_opt="-server
-Djava.awt.headless=true
-Dfile.encoding=UTF-8
-Dsun.jnu.encoding=UTF-8
Expand All @@ -113,26 +101,34 @@ start_server() {
-XX:NumberOfGCLogFiles=32
-XX:GCLogFileSize=64m"

java ${java_opt} \
-Dlogback.configurationFile="${GROOT_LOGBACK_FILE}" \
-Dconfig.file="${GROOT_CONF_FILE}" \
-Dlog.dir="${LOG_DIR}" \
-Dlog.name="${LOG_NAME}" \
-cp "${libpath}" com.alibaba.graphscope.groot.servers.GrootGraph \
"$@" > >(tee -a "${LOG_DIR}/${LOG_NAME}.out") 2> >(tee -a "${LOG_DIR}/${LOG_NAME}.err" >&2)
java ${java_opt} \
-Dlogback.configurationFile="${GROOT_LOGBACK_FILE}" \
-Dconfig.file="${GROOT_CONF_FILE}" \
-Dlog.dir="${LOG_DIR}" \
-Dlog.name="${LOG_NAME}" \
-cp "${libpath}" com.alibaba.graphscope.groot.servers.GrootGraph \
"$@" > >(tee -a "${LOG_DIR}/${LOG_NAME}.out") 2> >(tee -a "${LOG_DIR}/${LOG_NAME}.err" >&2)
}

# parse argv
while test $# -ne 0; do
arg=$1; shift
case $arg in
-h|--help) usage; exit ;;
start_max_node) start_max_node "gaia" "$@"; exit;;
start_server) start_server "$@"; exit;;
*)
echo "unrecognized option or command '${arg}'"
usage; exit;;
esac
arg=$1
shift
case $arg in
-h | --help)
usage
exit
;;
start)
start_server "$@"
exit
;;
*)
echo "unrecognized option or command '${arg}'"
usage
exit
;;
esac
done

set +xeo pipefail
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class StoreDataBatch {
private String requestId;
Expand Down Expand Up @@ -96,15 +95,12 @@ public int getSize() {
if (this.size == -1) {
this.size =
this.dataBatch.stream()
.collect(
Collectors.summingInt(
partitionToBatch ->
partitionToBatch.values().stream()
.collect(
Collectors.summingInt(
batch ->
batch
.getOperationCount()))));
.mapToInt(
partitionToBatch ->
partitionToBatch.values().stream()
.mapToInt(OperationBatch::getOperationCount)
.sum())
.sum();
}
return this.size;
}
Expand Down
Loading

0 comments on commit 983a41e

Please sign in to comment.