Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable volume on data loading #2715

Merged
merged 9 commits into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/gss.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
export RUSTC_WRAPPER=/usr/local/bin/sccache
sccache --start-server
cd ${GITHUB_WORKSPACE}/interactive_engine
mvn clean install -P groot,groot-assembly -Drust.compile.mode=debug -DskipTests -Dgroot.compile.feature="column_filter_push_down" --quiet
mvn clean install -P groot -Drust.compile.mode=debug -DskipTests -Dgroot.compile.feature="column_filter_push_down" --quiet
sccache --show-stats
Expand All @@ -92,7 +92,7 @@ jobs:
export SCCACHE_DIR=~/.cache/sccache
export RUSTC_WRAPPER=/usr/local/bin/sccache
cd ${GITHUB_WORKSPACE}/interactive_engine
mvn clean install -P groot,groot-assembly -Drust.compile.mode=debug -DskipTests --quiet
mvn clean install -P groot -Drust.compile.mode=debug -DskipTests --quiet
sccache --show-stats
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/k8s-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ jobs:
minikube image load graphscope/learning:${SHORT_SHA}
export PYTHONPATH=${GITHUB_WORKSPACE}/python:${PYTHONPATH}
cd ${GITHUB_WORKSPACE}/interactive_engine && mvn clean install --quiet -DskipTests -Drust.compile.skip=true -P graphscope,graphscope-assembly
cd ${GITHUB_WORKSPACE}/interactive_engine && mvn clean install --quiet -DskipTests -Drust.compile.skip=true -P graphscope
cd ${GITHUB_WORKSPACE}/interactive_engine/tests
# ./function_test.sh 8111 1
./function_test.sh 8112 2
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ interactive: $(INTERACTIVE_DIR)/assembly/target/graphscope.tar.gz

$(INTERACTIVE_DIR)/assembly/target/graphscope.tar.gz:
cd $(INTERACTIVE_DIR) && \
mvn package -DskipTests -Drust.compile.mode=$(BUILD_TYPE) -P graphscope,graphscope-assembly -Drevision=$(VERSION) --quiet
mvn package -DskipTests -Drust.compile.mode=$(BUILD_TYPE) -P graphscope -Drevision=$(VERSION) --quiet

learning-install: learning
mkdir -p $(INSTALL_PREFIX)
Expand Down
90 changes: 52 additions & 38 deletions docs/storage_engine/groot.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions interactive_engine/assembly/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
This will build graphscope or groot into an assembly archive.

# Usage
`mvn package -P graphscope,graphscope-assembly` will generate a graphscope.tar.gz under `target/`.
`mvn package -P groot,groot-assembly` will generate a groot.tar.gz under `target/`.
`mvn package -P graphscope` will generate a graphscope.tar.gz under `target/`.
`mvn package -P groot` will generate a groot.tar.gz under `target/`.
2 changes: 1 addition & 1 deletion interactive_engine/assembly/groot.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.0.0 http://maven.apache.org/xsd/assembly-2.0.0.xsd">
<id>groot-assembly</id>
<id>groot</id>
<formats>
<format>tar.gz</format>
</formats>
Expand Down
4 changes: 2 additions & 2 deletions interactive_engine/assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
</activation>
</profile>
<profile>
<id>graphscope-assembly</id>
<id>graphscope</id>
<build>
<plugins>
<plugin>
Expand Down Expand Up @@ -49,7 +49,7 @@
</dependencies>
</profile>
<profile>
<id>groot-assembly</id>
<id>groot</id>
<build>
<plugins>
<plugin>
Expand Down
16 changes: 2 additions & 14 deletions interactive_engine/assembly/src/bin/groot/store_ctl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
#
# groot command tool

set -x
set -e
set -o pipefail
set -xeo pipefail

usage() {
cat <<END
Expand All @@ -20,7 +18,6 @@ cat <<END
start_max_node start max_node of gaia
start_server start individual groot server
start_load_tools start load_tools
END
}

Expand Down Expand Up @@ -91,12 +88,6 @@ start_max_node() {
"$@" > >(tee -a "${LOG_DIR}/${LOG_NAME}.out") 2> >(tee -a "${LOG_DIR}/${LOG_NAME}.err" >&2)
}

start_load_tools() {
_setup_env
java -cp "${GROOT_HOME}/lib/data-load-tool-0.0.1-SNAPSHOT.jar" \
com.alibaba.graphscope.groot.dataload.LoadTool "$@"
}

# start groot server
start_server() {
_setup_env
Expand Down Expand Up @@ -138,13 +129,10 @@ while test $# -ne 0; do
-h|--help) usage; exit ;;
start_max_node) start_max_node "gaia" "$@"; exit;;
start_server) start_server "$@"; exit;;
start_load_tools) start_load_tools "$@"; exit;;
*)
echo "unrecognized option or command '${arg}'"
usage; exit;;
esac
done

set +e
set +o pipefail
set +x
set +xeo pipefail
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package com.alibaba.graphscope.groot.common.config;

public class DataLoadConfig {

// Get property

/** universal configurations **/
public static final String GRAPH_ENDPOINT = "graph.endpoint";

public static final String COLUMN_MAPPING_CONFIG = "column.mapping.config";

public static final String LOAD_AFTER_BUILD = "load.after.build";

public static final String SPLIT_SIZE = "split.size";

public static final String UNIQUE_PATH = "unique.path"; // generated automatically for each task
public static final String USER_NAME = "auth.username";
public static final String PASS_WORD = "auth.password";

/** job on HDFS configurations **/

// Input and output
public static final String INPUT_PATH = "input.path";

public static final String OUTPUT_PATH = "output.path";
public static final String SEPARATOR = "separator";
public static final String SKIP_HEADER = "skip.header";
public static final String LDBC_CUSTOMIZE = "ldbc.customize";
/* end */

/** job on ODPS configurations **/
public static final String DATA_SINK_TYPE = "data.sink.type"; // hdfs, oss, volume
// The table format is `project.table` or `table`;
// For partitioned table, the format is `project.table|p1=1/p2=2` or `table|p1=1/p2=2`
public static final String OUTPUT_TABLE = "output.table"; // a dummy table
/* end */

// Set property
public static final String SCHEMA_JSON = "schema.json";
public static final String COLUMN_MAPPINGS = "column.mappings";
public static final String META_INFO = "meta.info";

public static final String META_FILE_NAME = "META";

/** OSS configurations **/
public static final String OSS_ENDPOINT = "oss.endpoint";

public static final String OSS_ACCESS_ID = "oss.access.id";
public static final String OSS_ACCESS_KEY = "oss.access.key";

public static final String OSS_BUCKET_NAME = "oss.bucket.name";
public static final String OSS_OBJECT_NAME = "oss.object.name";
public static final String OSS_INFO_URL = "oss.info.url";
/* end */

/** ODPS Volume configurations **/
public static final String ODPS_VOLUME_PROJECT = "odps.volume.project";

public static final String ODPS_VOLUME_NAME = "odps.volume.name";
public static final String ODPS_VOLUME_PARTSPEC = "odps.volume.partspec";

public static final String ODPS_ACCESS_ID = "odps.access.id";
public static final String ODPS_ACCESS_KEY = "odps.access.key";
public static final String ODPS_ENDPOINT = "odps.endpoint";
/* end */

}
Loading
Loading