Skip to content

Commit

Permalink
Merge 6c205c7 into 65dde41
Browse files Browse the repository at this point in the history
  • Loading branch information
fnothaft committed Oct 20, 2017
2 parents 65dde41 + 6c205c7 commit 6936037
Show file tree
Hide file tree
Showing 15 changed files with 306 additions and 144 deletions.
Expand Up @@ -169,7 +169,7 @@ class VariantContextRDDSuite extends ADAMFunSuite {
// -Inf INFO value --> -Infinity after conversion
assert(variant.getAnnotation.getAttributes.get("BaseQRankSum") === "-Infinity")

val genotype = vcs.toGenotypes().rdd.filter(_.getVariant == variant).first()
val genotype = vcs.toGenotypes().rdd.filter(_.getStart == 14396L).first()
assert(genotype.getVariantCallingAnnotations.getRmsMapQ === Float.NegativeInfinity)
// +Inf FORMAT value --> Infinity after conversion
assert(genotype.getVariantCallingAnnotations.getAttributes.get("float") === "Infinity")
Expand Down
9 changes: 9 additions & 0 deletions adam-distribution/src/main/assembly/assembly.xml
Expand Up @@ -59,5 +59,14 @@
<directoryMode>0755</directoryMode>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../adam-python/dist</directory>
<includes>
<include>bdgenomics.adam*.egg</include>
</includes>
<outputDirectory>repo</outputDirectory>
<directoryMode>0755</directoryMode>
<fileMode>0644</fileMode>
</fileSet>
</fileSets>
</assembly>
5 changes: 3 additions & 2 deletions adam-python/Makefile
Expand Up @@ -26,7 +26,7 @@ The 'prepare' target installs ADAM's build requirements into the current virtual

The 'develop' target creates an editable install of ADAM and its runtime requirements in the
current virtualenv. The install is called 'editable' because changes to the source code
immediately affect the virtualenv.
immediately affect the virtualenv. This target also builds a .egg.

The 'clean' target undoes the effect of 'develop'.

Expand Down Expand Up @@ -60,9 +60,10 @@ red=\033[0;31m

develop:
$(pip) install -e .$(extras)
$(python) setup.py bdist_egg
clean_develop:
- $(pip) uninstall -y adam
- rm -rf bdgenomics/*.egg-info
- rm -rf bdgenomics/*.egg*

sdist: dist/$(sdist_name)
dist/$(sdist_name):
Expand Down
52 changes: 6 additions & 46 deletions bin/adam-shell
Expand Up @@ -21,56 +21,16 @@ set -e

# does the user have ADAM_OPTS set? if yes, then warn
if [[ -z $@ && -n "$ADAM_OPTS" ]]; then
echo "WARNING: Passing Spark arguments via ADAM_OPTS was recently removed."
echo "Run adam-shell instead as adam-shell <spark-args>"
echo "WARNING: Passing Spark arguments via ADAM_OPTS was recently removed." 1>&2
echo "Run adam-shell instead as adam-shell <spark-args>" 1>&2
fi

# Find original directory of this script, resolving symlinks
# http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in/246128#246128
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$SCRIPT_DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
INSTALL_DIR="$SCRIPT_DIR/.."
SOURCE_DIR=$(dirname ${BASH_SOURCE[0]})

# Find ADAM cli assembly jar
ADAM_CLI_JAR=
if [ -d "$INSTALL_DIR/repo" ]; then
ASSEMBLY_DIR="$INSTALL_DIR/repo"
else
ASSEMBLY_DIR="$INSTALL_DIR/adam-assembly/target"
fi

num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources | wc -l)"
if [ "$num_jars" -eq "0" ]; then
echo "Failed to find ADAM assembly in $ASSEMBLY_DIR." 1>&2
echo "You need to build ADAM before running this program." 1>&2
exit 1
fi

ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources || true)"
if [ "$num_jars" -gt "1" ]; then
echo "Found multiple ADAM cli assembly jars in $ASSEMBLY_DIR:" 1>&2
echo "$ASSEMBLY_JARS" 1>&2
echo "Please remove all but one jar." 1>&2
exit 1
fi

ADAM_CLI_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
ADAM_CLI_JAR=$(${SOURCE_DIR}/find-adam-assembly.sh)

if [ -z "$SPARK_HOME" ]; then
SPARK_SHELL=$(which spark-shell || echo)
else
SPARK_SHELL="$SPARK_HOME"/bin/spark-shell
fi
if [ -z "$SPARK_SHELL" ]; then
echo "SPARK_HOME not set and spark-shell not on PATH; Aborting."
exit 1
fi
echo "Using SPARK_SHELL=$SPARK_SHELL"
SPARK_SHELL=$(${SOURCE_DIR}/find-spark.sh spark-shell)
echo "Using SPARK_SHELL=$SPARK_SHELL" 1>&2

# submit the job to Spark
"$SPARK_SHELL" \
Expand Down
57 changes: 8 additions & 49 deletions bin/adam-submit
Expand Up @@ -21,6 +21,8 @@

set -e

SOURCE_DIR=$(dirname ${BASH_SOURCE[0]})

# Split args into Spark and ADAM args
DD=False # DD is "double dash"
PRE_DD=()
Expand All @@ -45,63 +47,20 @@ fi

# does the user have ADAM_OPTS set? if yes, then warn
if [[ $DD == False && -n "$ADAM_OPTS" ]]; then
echo "WARNING: Passing Spark arguments via ADAM_OPTS was recently removed."
echo "Run adam-submit instead as adam-submit <spark-args> -- <adam-args>"
echo "WARNING: Passing Spark arguments via ADAM_OPTS was recently removed." 1>&2
echo "Run adam-submit instead as adam-submit <spark-args> -- <adam-args>" 1>&2
fi

# Find original directory of this script, resolving symlinks
# http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in/246128#246128
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$SCRIPT_DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
INSTALL_DIR="$SCRIPT_DIR/.."

# Find ADAM cli assembly jar
ADAM_CLI_JAR=
if [ -d "$INSTALL_DIR/repo" ]; then
ASSEMBLY_DIR="$INSTALL_DIR/repo"
else
ASSEMBLY_DIR="$INSTALL_DIR/adam-assembly/target"
fi

num_jars="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources | wc -l)"
if [ "$num_jars" -eq "0" ]; then
echo "Failed to find ADAM cli assembly in $ASSEMBLY_DIR." 1>&2
echo "You need to build ADAM before running this program." 1>&2
exit 1
fi

ASSEMBLY_JARS="$(ls -1 "$ASSEMBLY_DIR" | grep "^adam[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources || true)"
if [ "$num_jars" -gt "1" ]; then
echo "Found multiple ADAM cli assembly jars in $ASSEMBLY_DIR:" 1>&2
echo "$ASSEMBLY_JARS" 1>&2
echo "Please remove all but one jar." 1>&2
exit 1
fi

ADAM_CLI_JAR="${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
ADAM_CLI_JAR=$(${SOURCE_DIR}/find-adam-assembly.sh)

# Allow main class to be overridden
if [ -z "$ADAM_MAIN" ]; then
ADAM_MAIN="org.bdgenomics.adam.cli.ADAMMain"
fi
echo "Using ADAM_MAIN=$ADAM_MAIN"
echo "Using ADAM_MAIN=$ADAM_MAIN" 1>&2

# Find spark-submit script
if [ -z "$SPARK_HOME" ]; then
SPARK_SUBMIT=$(which spark-submit || echo)
else
SPARK_SUBMIT="$SPARK_HOME"/bin/spark-submit
fi
if [ -z "$SPARK_SUBMIT" ]; then
echo "SPARK_HOME not set and spark-submit not on PATH; Aborting."
exit 1
fi
echo "Using SPARK_SUBMIT=$SPARK_SUBMIT"
SPARK_SUBMIT=$(${SOURCE_DIR}/find-spark.sh)
echo "Using spark-submit=$SPARK_SUBMIT" 1>&2

# submit the job to Spark
"$SPARK_SUBMIT" \
Expand Down
36 changes: 36 additions & 0 deletions bin/adamR
@@ -0,0 +1,36 @@
#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

SOURCE_DIR=$(dirname ${BASH_SOURCE[0]})

ADAM_CLI_JAR=$(${SOURCE_DIR}/find-adam-assembly.sh)

SPARKR=$(${SOURCE_DIR}/find-spark.sh sparkR)
echo "Using SPARKR=$SPARKR" 1>&2

# submit the job to Spark
"$SPARKR" \
--conf spark.serializer=org.apache.spark.serializer.KryoSerializer \
--conf spark.kryo.registrator=org.bdgenomics.adam.serialization.ADAMKryoRegistrator \
--jars ${ADAM_CLI_JAR} \
--driver-class-path ${ADAM_CLI_JAR} \
"$@"

50 changes: 50 additions & 0 deletions bin/find-adam-assembly.sh
@@ -0,0 +1,50 @@
#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

SOURCE_DIR=$(dirname ${BASH_SOURCE[0]})
SCRIPT_DIR=$(${SOURCE_DIR}/find-script-dir.sh)
INSTALL_DIR=$(dirname $SCRIPT_DIR)

# Find ADAM cli assembly jar
ADAM_CLI_JAR=
if [ -d "$INSTALL_DIR/repo" ]; then
ASSEMBLY_DIR="$INSTALL_DIR/repo"
else
ASSEMBLY_DIR="$INSTALL_DIR/adam-assembly/target"
fi

ASSEMBLY_JARS=$(ls -1 "$ASSEMBLY_DIR" | grep "^adam[0-9A-Za-z\.\_\-]*\.jar$" | grep -v javadoc | grep -v sources || true)
num_jars=$(echo ${ASSEMBLY_JARS} | wc -l)

if [ "$num_jars" -eq "0" ]; then
echo "Failed to find ADAM cli assembly in $ASSEMBLY_DIR." 1>&2
echo "You need to build ADAM before running this program." 1>&2
exit 1
fi

if [ "$num_jars" -gt "1" ]; then
echo "Found multiple ADAM cli assembly jars in $ASSEMBLY_DIR:" 1>&2
echo "$ASSEMBLY_JARS" 1>&2
echo "Please remove all but one jar." 1>&2
exit 1
fi

echo "${ASSEMBLY_DIR}/${ASSEMBLY_JARS}"
49 changes: 49 additions & 0 deletions bin/find-adam-egg.sh
@@ -0,0 +1,49 @@
#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

SOURCE_DIR=$(dirname ${BASH_SOURCE[0]})
SCRIPT_DIR=$(${SOURCE_DIR}/find-script-dir.sh)
INSTALL_DIR=$(dirname $SCRIPT_DIR)

# Find ADAM python egg
if [ -d "$INSTALL_DIR/repo" ]; then
DIST_DIR="$INSTALL_DIR/repo"
else
DIST_DIR="$INSTALL_DIR/adam-python/dist"
fi

DIST_EGG=$(ls -1 "$DIST_DIR" | grep "^bdgenomics\.adam[0-9A-Za-z\.\_\-]*.egg$" || true)
num_egg=$(echo ${DIST_EGG} | wc -l)

if [ "$num_egg" -eq "0" ]; then
echo "Failed to find ADAM egg in $DIST_DIR." 1>&2
echo "You need to build ADAM before running this program." 1>&2
exit 1
fi

if [ "$num_egg" -gt "1" ]; then
echo "Found multiple ADAM eggs in $DIST_DIR:" 1>&2
echo "$DIST_EGG" 1>&2
echo "Please remove all but one egg." 1>&2
exit 1
fi

echo "${DIST_DIR}/${DIST_EGG}"
32 changes: 32 additions & 0 deletions bin/find-script-dir.sh
@@ -0,0 +1,32 @@
#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

# Find original directory of this script, resolving symlinks
# http://stackoverflow.com/questions/59895/can-a-bash-script-tell-what-directory-its-stored-in/246128#246128
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink
SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
SOURCE="$(readlink "$SOURCE")"
[[ $SOURCE != /* ]] && SOURCE="$SCRIPT_DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located
done

echo $( cd -P "$( dirname "$SOURCE" )" && pwd )

35 changes: 35 additions & 0 deletions bin/find-spark.sh
@@ -0,0 +1,35 @@
#!/usr/bin/env bash
#
# Licensed to Big Data Genomics (BDG) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The BDG licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e

SPARK_CMD=${1:-spark-submit}

# Find spark-submit script
if [ -z "$SPARK_HOME" ]; then
SPARK_SUBMIT=$(which ${SPARK_CMD} || echo)
else
SPARK_SUBMIT=${SPARK_HOME}/bin/${SPARK_CMD}
fi
if [ -z "$SPARK_SUBMIT" ]; then
echo "SPARK_HOME not set and ${SPARK_CMD} not on PATH; Aborting." 1>&2
exit 1
fi

echo ${SPARK_SUBMIT}

0 comments on commit 6936037

Please sign in to comment.