-
Notifications
You must be signed in to change notification settings - Fork 309
/
jenkins-test
executable file
·316 lines (250 loc) · 8.98 KB
/
jenkins-test
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env bash
set -e -x -v
# make a tempdir for writing maven cruft to
ADAM_MVN_TMP_DIR=$(mktemp -d -t adamTestMvnXXXXXXX)
# add this tempdir to the poms...
find . -name pom.xml \
-exec sed -i.bak \
-e "s:sun.io.serialization.extendedDebugInfo=true:sun.io.serialization.extendedDebugInfo=true -Djava.io.tmpdir=${ADAM_MVN_TMP_DIR}:g" \
{} \;
find . -name "*.bak" -exec rm -f {} \;
# variable declarations
export PATH=${JAVA_HOME}/bin/:${PATH}
export MAVEN_OPTS="-Xmx1536m -XX:MaxPermSize=1g -Dfile.encoding=utf-8"
DIR=$( cd $( dirname ${BASH_SOURCE[0]} ) && pwd )
PROJECT_ROOT=${DIR}/..
VERSION=$(grep "<version>" ${PROJECT_ROOT}/pom.xml | head -2 | tail -1 | sed 's/ *<version>//g' | sed 's/<\/version>//g')
# is the hadoop version set?
if ! [[ ${HADOOP_VERSION} ]];
then
echo "HADOOP_VERSION environment variable is not set."
echo "Please set this variable before running."
exit 1
fi
# is the spark version set?
if ! [[ ${SPARK_VERSION} ]];
then
echo "SPARK_VERSION environment variable is not set."
echo "Please set this variable before running."
exit 1
fi
set -e
# build defaults to Scala 2.11
if [ ${SCALAVER} == 2.11 ];
then
# shouldn't be able to move to scala 2.11 twice
set +e
./scripts/move_to_scala_2.11.sh
if [[ $? == 0 ]];
then
echo "We have already moved to Scala 2.11, so running move_to_scala_2.11.sh a second time should fail, but error code was 0 (success)."
exit 1
fi
set -e
fi
# move to Scala 2.12 if requested
if [ ${SCALAVER} == 2.12 ];
then
set +e
./scripts/move_to_scala_2.12.sh
set -e
fi
# print versions
echo "Testing ADAM version ${VERSION} on Spark ${SPARK_VERSION} and Hadoop ${HADOOP_VERSION}"
# first, build the sources, run the unit tests, and generate a coverage report
mvn clean \
-Dhadoop.version=${HADOOP_VERSION}
# if this is a pull request, we need to set the coveralls pr id
if [[ ! -z $ghprbPullId ]];
then
COVERALLS_PRB_OPTION="-DpullRequest=${ghprbPullId}"
fi
# coveralls token should not be visible
set +x +v
if [[ -z ${COVERALLS_REPO_TOKEN} ]];
then
echo "Coveralls token is not set. Exiting..."
exit 1
fi
# if those pass, build the distribution package and the integration tests
mvn -U \
test \
-P coverage,coveralls scoverage:report coveralls:report \
-DrepoToken=${COVERALLS_REPO_TOKEN} ${COVERALLS_PRB_OPTION}
# make verbose again
set -x -v
# if those pass, build the distribution package
mvn -U \
-P distribution \
package \
-DskipTests \
-Dhadoop.version=${HADOOP_VERSION} \
-DargLine=${ADAM_MVN_TMP_DIR}
# make sure that the distribution package contains an assembly jar
# if no assembly jar is found, this will exit with code 1 and fail the build
tar tzvf adam-distribution/target/adam-distribution*-bin.tar.gz | \
grep adam-assembly | \
grep jar | \
grep -v -e sources -e javadoc
# we are done with maven, so clean up the maven temp dir
find ${ADAM_MVN_TMP_DIR}
rm -rf ${ADAM_MVN_TMP_DIR}
find . -name pom.xml \
-exec sed -i.bak \
-e "s:sun.io.serialization.extendedDebugInfo=true -Djava.io.tmpdir=${ADAM_MVN_TMP_DIR}:sun.io.serialization.extendedDebugInfo=true:g" \
{} \;
find . -name "*.bak" -exec rm -f {} \;
# run integration tests
# make a temp directory
ADAM_TMP_DIR=$(mktemp -d -t adamTestXXXXXXX)
# Just to be paranoid.. use a directory internal to the ADAM_TMP_DIR
ADAM_TMP_DIR=$ADAM_TMP_DIR/deleteMePleaseThisIsNoLongerNeeded
mkdir $ADAM_TMP_DIR
# set the TMPDIR envar, which is used by python to choose where to make temp directories
export TMPDIR=${ADAM_TMP_DIR}
pushd $PROJECT_ROOT
# Copy the jar into our temp space for testing
cp -r . $ADAM_TMP_DIR
popd
pushd $ADAM_TMP_DIR
# what hadoop version are we on? format string for downloading spark assembly
if [[ $HADOOP_VERSION =~ ^2\.6 ]]; then
HADOOP=hadoop2.6
elif [[ $HADOOP_VERSION =~ ^2\.7 ]]; then
HADOOP=hadoop2.7
else
echo "Unknown Hadoop version."
exit 1
fi
# set spark artifact string for downloading assembly
SPARK=spark-${SPARK_VERSION}
# download prepackaged spark assembly
# Spark 2.4.3 needs special case for Scala 2.12
if [ ${SCALAVER} == 2.12 ];
then
curl \
-L "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/${SPARK}/${SPARK}-bin-without-hadoop-scala-2.12.tgz" \
-o ${SPARK}-bin-without-hadoop-scala-2.12.tgz
tar xzvf ${SPARK}-bin-without-hadoop-scala-2.12.tgz
export SPARK_HOME=${ADAM_TMP_DIR}/${SPARK}-bin-without-hadoop-scala-2.12
curl \
-L "http://www-us.apache.org/dist/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz" \
-o hadoop-2.7.7.tar.gz
tar xzvf hadoop-2.7.7.tar.gz
# remove references to avro 1.7.x
find hadoop-2.7.7 -name *.jar | grep avro | xargs rm
# download avro 1.8.x
curl \
-L "http://repo1.maven.org/maven2/org/apache/avro/avro/1.8.2/avro-1.8.2.jar" \
-o hadoop-2.7.7/share/hadoop/common/avro-1.8.2.jar
export SPARK_DIST_CLASSPATH=$(hadoop-2.7.7/bin/hadoop classpath)
else
curl \
-L "https://www.apache.org/dyn/mirrors/mirrors.cgi?action=download&filename=spark/${SPARK}/${SPARK}-bin-${HADOOP}.tgz" \
-o ${SPARK}-bin-${HADOOP}.tgz
tar xzvf ${SPARK}-bin-${HADOOP}.tgz
export SPARK_HOME=${ADAM_TMP_DIR}/${SPARK}-bin-${HADOOP}
fi
# set the path to the adam submit script
ADAM=./bin/adam-submit
# test running adam-shell
./bin/adam-shell -i scripts/jenkins-test-adam-shell.scala
# add pyspark to the python path
PY4J_ZIP="$(ls -1 "${SPARK_HOME}/python/lib" | grep py4j)"
export PYTHONPATH=${SPARK_HOME}/python:${SPARK_HOME}/python/lib/${PY4J_ZIP}:${PYTHONPATH}
# put adam jar on the pyspark path
ASSEMBLY_DIR="${ADAM_TMP_DIR}/adam-assembly/target"
ASSEMBLY_JAR="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam[0-9A-Za-z\_\.-]*\.jar$" | grep -v javadoc | grep -v sources || true)"
export PYSPARK_SUBMIT_ARGS="--conf spark.driver.memory=4g --conf spark.executor.memory=4g --jars ${ASSEMBLY_DIR}/${ASSEMBLY_JAR} --driver-class-path ${ASSEMBLY_DIR}/${ASSEMBLY_JAR} pyspark-shell"
# create a conda environment for python build, if necessary
pythons=( 2.7 3.6 )
for python in ${pythons[*]}
do
uuid=$(uuidgen)
conda create -y -q -n adam-build-${uuid} python=${python}
source activate adam-build-${uuid}
# prepare adam python
pushd adam-python
make prepare
popd
# make a directory to install SparkR into, and set the R user libs path
export R_LIBS_USER=${SPARK_HOME}/local_R_libs
mkdir -p ${R_LIBS_USER}
R CMD INSTALL \
-l ${R_LIBS_USER} \
${SPARK_HOME}/R/lib/SparkR/
export SPARKR_SUBMIT_ARGS="--jars ${ASSEMBLY_DIR}/${ASSEMBLY_JAR} --driver-class-path ${ASSEMBLY_DIR}/${ASSEMBLY_JAR} sparkr-shell"
# we can run the python build, now that we have a spark executable
# -DskipTests appears to skip the python/r tests?
mvn -U \
-P python,r,distribution \
package \
-Dsuites=select.no.suites\* \
-Dhadoop.version=${HADOOP_VERSION}
# make sure that the distribution package contains an egg
# if no assembly jar is found, this will exit with code 1 and fail the build
tar tzvf adam-distribution/target/adam-distribution*-bin.tar.gz | \
grep bdgenomics.adam | \
grep egg
# run pyadam test
./bin/pyadam < scripts/jenkins-test-pyadam.py
# deactivate and remove the conda env
source deactivate
conda remove -y -n adam-build-${uuid} --all
# copy python targets back
cp -r adam-python/target ${PROJECT_ROOT}/adam-python/
# clean after each python version
pushd adam-python
make clean
make clean_sdist
popd
done
# define filenames
BAM=mouse_chrM.bam
READS=${BAM}.reads.adam
SORTED_READS=${BAM}.reads.sorted.adam
FRAGMENTS=${BAM}.fragments.adam
# fetch our input dataset
echo "Fetching BAM file"
rm -rf ${BAM}
wget -q https://s3.amazonaws.com/bdgenomics-test/${BAM}
# once fetched, convert BAM to ADAM
echo "Converting BAM to ADAM read format"
rm -rf ${READS}
${ADAM} transformAlignments ${BAM} ${READS}
# then, sort the BAM
echo "Converting BAM to ADAM read format with sorting"
rm -rf ${SORTED_READS}
${ADAM} transformAlignments -sort_by_reference_position ${READS} ${SORTED_READS}
# convert the reads to fragments to re-pair the reads
echo "Converting read file to fragments"
rm -rf ${FRAGMENTS}
${ADAM} transformFragments -load_as_alignments ${READS} ${FRAGMENTS}
# test that printing works
echo "Printing reads and fragments"
${ADAM} print ${READS} 1>/dev/null 2>/dev/null
${ADAM} print ${FRAGMENTS} 1>/dev/null 2>/dev/null
# run flagstat to verify that flagstat runs OK
echo "Printing read statistics"
${ADAM} flagstat -print_metrics ${READS}
rm -rf ${ADAM_TMP_DIR}
popd
pushd ${PROJECT_ROOT}
# move back to Scala 2.11 as default
if [ ${SCALAVER} == 2.12 ];
then
set +e
./scripts/move_to_scala_2.11.sh
set -e
fi
# test that the source is formatted correctly
./scripts/format-source
if test -n "$(git status --porcelain)"
then
echo "Please run './scripts/format-source'"
exit 1
fi
popd
echo
echo "All the tests passed"
echo