/
adam-submit
executable file
·105 lines (92 loc) · 3.04 KB
/
adam-submit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# usage: adam-submit [<spark-args> --] <adam-args>
set -e
# Split args into Spark and ADAM args
DD=False # DD is "double dash"
PRE_DD=()
POST_DD=()
for ARG in "$@"; do
shift
if [[ $ARG == "--" ]]; then
DD=True
POST_DD=( "$@" )
break
fi
PRE_DD+=("$ARG")
done
if [[ $DD == True ]]; then
SPARK_ARGS=("${PRE_DD[@]}")
ADAM_ARGS=("${POST_DD[@]}")
else
SPARK_ARGS=()
ADAM_ARGS=("${PRE_DD[@]}")
fi
# does the user have ADAM_OPTS set? if yes, then warn
if [[ $DD == False && -n "$ADAM_OPTS" ]]; then
echo "WARNING: Passing Spark arguments via ADAM_OPTS was recently removed."
echo "Run adam-submit instead as adam-submit <spark-args> -- <adam-args>"
fi
# Figure out where ADAM is installed
SCRIPT_DIR="$(cd `dirname $0`/..; pwd)"
# Find ADAM cli assembly jar
ADAM_CLI_JAR=
if [ -d "$SCRIPT_DIR/repo" ]; then
ASSEMBLY_DIR="$SCRIPT_DIR/repo"
else
ASSEMBLY_DIR="$SCRIPT_DIR/adam-assembly/target"
fi
num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam-assembly_[0-9A-Za-z\.-]*\.jar$" | grep -v javadoc | wc -l)"
if [ "$num_jars" -eq "0" ]; then
echo "Failed to find ADAM cli assembly in $ASSEMBLY_DIR." 1>&2
echo "You need to build ADAM before running this program." 1>&2
exit 1
fi
ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam-assembly_[0-9A-Za-z\.-]*\.jar$" | grep -v javadoc || true)"
if [ "$num_jars" -gt "1" ]; then
echo "Found multiple ADAM cli assembly jars in $ASSEMBLY_DIR:" 1>&2
echo "$ASSEMBLY_JARS" 1>&2
echo "Please remove all but one jar." 1>&2
exit 1
fi
ADAM_CLI_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
# Allow main class to be overridden
if [ -z "$ADAM_MAIN" ]; then
ADAM_MAIN="org.bdgenomics.adam.cli.ADAMMain"
fi
echo "Using ADAM_MAIN=$ADAM_MAIN"
# Find spark-submit script
if [ -z "$SPARK_HOME" ]; then
SPARK_SUBMIT=$(which spark-submit || echo)
else
SPARK_SUBMIT="$SPARK_HOME"/bin/spark-submit
fi
if [ -z "$SPARK_SUBMIT" ]; then
echo "SPARK_HOME not set and spark-submit not on PATH; Aborting."
exit 1
fi
echo "Using SPARK_SUBMIT=$SPARK_SUBMIT"
# submit the job to Spark
"$SPARK_SUBMIT" \
--class $ADAM_MAIN \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \
"${SPARK_ARGS[@]}" \
"$ADAM_CLI_JAR" \
"${ADAM_ARGS[@]}"