From 0158dfa7b69cef398e0fbe008db81c1f24ad94ac Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Wed, 14 Feb 2018 16:22:47 -0600 Subject: [PATCH 01/18] Adding container builder. --- core/pom.xml | 5 + .../cannoli/builder/CommandBuilder.java | 448 ++++++++++++++++++ .../cannoli/builder/ContainerBuilder.java | 148 ++++++ .../cannoli/builder/DockerBuilder.java | 80 ++++ .../cannoli/builder/LocalBuilder.java | 60 +++ .../cannoli/builder/SingularityBuilder.java | 82 ++++ .../cannoli/builder/package-info.java | 22 + .../cannoli/builder/DockerBuilderTest.java | 159 +++++++ .../cannoli/builder/LocalBuilderTest.java | 174 +++++++ .../builder/SingularityBuilderTest.java | 92 ++++ pom.xml | 13 +- 11 files changed, 1276 insertions(+), 7 deletions(-) create mode 100644 core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java create mode 100644 core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java create mode 100644 core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java create mode 100644 core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java create mode 100644 core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java create mode 100644 core/src/main/java/org/bdgenomics/cannoli/builder/package-info.java create mode 100644 core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java create mode 100644 core/src/test/java/org/bdgenomics/cannoli/builder/LocalBuilderTest.java create mode 100644 core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java diff --git a/core/pom.xml b/core/pom.xml index 2acb76f9..1d771298 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -37,6 +37,11 @@ + + junit + junit + test + org.apache.spark spark-core_${scala.version.prefix} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java new file mode 100644 index 00000000..405cd309 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java @@ -0,0 +1,448 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.io.Serializable; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import java.util.concurrent.TimeUnit; + +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +/** + * Abstract command builder. + * + * @param C command builder + */ +abstract class CommandBuilder implements Serializable { + /** Command to run. */ + private String executable; + + /** Name of container image. */ + private String image; + + /** Number of bases to flank each command invocation by. */ + private Integer flankSize; + + /** True to run via sudo. */ + private boolean sudo = false; + + /** How long to let a single partition run for, in seconds. */ + private Long timeout; + + /** List of command arguments. */ + private final List arguments = new ArrayList(); + + /** Map of environment variables. */ + private final Map environment = new HashMap(); + + /** List of files to make available locally. */ + private final List files = new ArrayList(); + + /** Map of mount points. */ + private Map mounts = new HashMap(); + + + /** + * Create a new command builder. + */ + protected CommandBuilder() { + // empty + } + + /** + * Create a new command builder with the specified executable. + * + * @param executable executable, must not be null + */ + protected CommandBuilder(final String executable) { + this(); + setExecutable(executable); + } + + + /** + * Set the executable for this command builder. + * + * @param executable executable, must not be null + * @return this command builder + */ + public final C setExecutable(final String executable) { + checkNotNull(executable); + this.executable = executable; + return (C) this; + } + + /** + * Set the number of bases to flank each command invocation by for this builder. + * + * @param flankSize number of bases to flank each command invocation by + * @return this command builder + */ + public final C setFlankSize(@Nullable final Integer flankSize) { + this.flankSize = flankSize; + return (C) this; + } + + /** + * Set the image for this command builder. + * + * @param image image, must not be null + * @return this command builder + */ + public final C setImage(final String image) { + checkNotNull(image); + this.image = image; + return (C) this; + } + + /** + * Set to true to run via sudo for this command builder. + * + * @param sudo true to run via sudo + * @return this command builder + */ + public final C setSudo(final boolean sudo) { + this.sudo = sudo; + return (C) this; + } + + /** + * Set how long to let a single partition run for, in seconds, for this builder. + * + * @param timeout how long to let a single partition run for, in seconds + * @return this command builder + */ + public final C setTimeout(@Nullable final Long timeout) { + this.timeout = timeout; + return (C) this; + } + + /** + * Set how long to let a single partition run for, in the specified time unit, for this builder. + * + * @param duration duration + * @param timeUnit time unit, must not be null + * @return this command builder + */ + public final C setTimeout(final long duration, final TimeUnit timeUnit) { + checkNotNull(timeUnit); + this.timeout = timeUnit.toSeconds(duration); + return (C) this; + } + + + /** + * Add one or more arguments to the list of command arguments for this command builder. + * + * @param arguments variable number of arguments to add, must not be null + * @return this command builder + */ + public final C add(final String... arguments) { + return addArguments(arguments); + } + + /** + * Add one or more arguments to the list of command arguments for this command builder. + * + * @param arguments arguments to add, must not be null + * @return this command builder + */ + public final C add(final Iterable arguments) { + return addArguments(arguments); + } + + /** + * Add an argument to the list of command arguments for this command builder. + * + * @param argument argument to add, must not be null + * @return this command builder + */ + public final C addArgument(final String argument) { + checkNotNull(argument); + arguments.add(argument); + return (C) this; + } + + /** + * Add one or more arguments to the list of command arguments for this command builder. + * + * @param arguments variable number of arguments to add, must not be null + * @return this command builder + */ + public final C addArguments(final String... arguments) { + checkNotNull(arguments); + for (String argument : arguments) { + this.arguments.add(argument); + } + return (C) this; + } + + /** + * Add one or more arguments to the list of command arguments for this command builder. + * + * @param arguments arguments to add, must not be null + * @return this command builder + */ + public final C addArguments(final Iterable arguments) { + checkNotNull(arguments); + for (String argument : arguments) { + this.arguments.add(argument); + } + return (C) this; + } + + /** + * Add an environment variable to the map of environment variables for this command builder. + * + * @param variable environment variable to add, must not be null + * @param value environment variable value to add, must not be null + * @return this command builder + */ + public final C addEnvironment(final String variable, final String value) { + checkNotNull(variable); + checkNotNull(value); + environment.put(variable, value); + return (C) this; + } + + /** + * Add environment variables to the map of environment variables for this command builder. + * + * @param environment environment variables to add, must not be null + * @return this command builder + */ + public final C addEnvironment(final Map environment) { + checkNotNull(environment); + this.environment.putAll(environment); + return (C) this; + } + + /** + * Add a file to the list of files to make available locally for this command builder. + * + * @param file file to add, must not be null + * @return this command builder + */ + public final C addFile(final String file) { + checkNotNull(file); + files.add(file); + return (C) this; + } + + /** + * Add zero or more files to the list of files to make available locally for this command builder. + * + * @param files variable number of files to add, must not be null + * @return this command builder + */ + public final C addFiles(final String... files) { + checkNotNull(files); + for (String file : files) { + this.files.add(file); + } + return (C) this; + } + + /** + * Add files to the list of files to make available locally for this command builder. + * + * @param files files to add, must not be null + * @return this command builder + */ + public final C addFiles(final Iterable files) { + checkNotNull(files); + for (String file : files) { + this.files.add(file); + } + return (C) this; + } + + /** + * Add the specified mount point to the map of mount points for this command builder. + * + * @param source mount point source, must not be null + * @param target mount point target, must not be null + * @return this command builder + */ + public final C addMount(final String source, final String target) { + checkNotNull(source); + checkNotNull(target); + mounts.put(source, target); + return (C) this; + } + + /** + * Add the specified mount points to the map of mount points for this command builder. + * + * @param mounts mount points to add, must not be null + */ + public final C addMounts(final Map mounts) { + checkNotNull(mounts); + this.mounts.putAll(mounts); + return (C) this; + } + + + /** + * Return the executable for this command builder. + * + * @return the executable for this command builder + */ + public final String getExecutable() { + return executable; + } + + /** + * Return the number of bases to flank each command invocation by for this builder. May be null. + * + * @return the number of bases to flank each command invocation by for this builder + */ + public final Integer getFlankSize() { + return flankSize; + } + + /** + * Return the number of bases to flank each command invocation by for this builder, as an optional. + * + * @return the number of bases to flank each command invocation by for this builder, as an optional + */ + public final Optional getOptFlankSize() { + return Optional.ofNullable(flankSize); + } + + /** + * Return the image for this command builder. + * + * @return the image for this command builder. + */ + public final String getImage() { + return image; + } + + /** + * Return how long to let a single partition run for, in seconds, for this builder. May be null. + * + * @return how long to let a single partition run for, in seconds, for this builder + */ + public final Long getTimeout() { + return timeout; + } + + /** + * Return how long to let a single partition run for, in seconds, for this builder, as an optional. + * + * @return how long to let a single partition run for, in seconds, for this builder, as an optional + */ + public final Optional getOptTimeout() { + return Optional.ofNullable(timeout); + } + + /** + * Return true to run via sudo for this command builder. + * + * @return true to run via sudo for this command builder + */ + public final boolean getSudo() { + return isSudo(); + } + + /** + * Return true to run via sudo for this command builder. + * + * @return true to run via sudo for this command builder + */ + public final boolean isSudo() { + return sudo; + } + + /** + * Return an immutable list of command arguments for this command builder. + * + * @return an immutable list of command arguments for this command builder + */ + public final List getArguments() { + return ImmutableList.copyOf(arguments); + } + + /** + * Return an immutable map of environment variables for this command builder. + * + * @return an immutable map of environment variables for this command builder + */ + public final Map getEnvironment() { + return ImmutableMap.copyOf(environment); + } + + /** + * Return an immutable list of files to make available locally for this command builder. + * + * @return an immutable list of files to make available locally for this command builder + */ + public final List getFiles() { + return ImmutableList.copyOf(files); + } + + /** + * Return the map of mount points for this command builder. + * + * @return the map of mount points for this command builder + */ + public final Map getMounts() { + return ImmutableMap.copyOf(mounts); + } + + /** + * Reset this command builder. + * + * @return this command builder + */ + public final C reset() { + executable = null; + flankSize = null; + image = null; + sudo = false; + timeout = null; + + arguments.clear(); + environment.clear(); + files.clear(); + mounts.clear(); + + return (C) this; + } + + /** + * Build and return the command for this command builder as a list of strings. + * + * @return the command for this command builder as a list of strings. + * @throws IllegalStateException if this builder is in an illegal state, e.g. + * if required values are not set + */ + public abstract List build(); +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java new file mode 100644 index 00000000..29ee8b61 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java @@ -0,0 +1,148 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +/** + * Abstract container builder. + * + * @param C container builder + */ +abstract class ContainerBuilder extends CommandBuilder { + + /** + * Create a new container builder. + */ + protected ContainerBuilder() { + super(); + } + + /** + * Create a new container builder with the specified executable. + * + * @param executable executable, must not be null + */ + protected ContainerBuilder(final String executable) { + super(executable); + } + + /** + * Create a new container builder with the specified executable and image. + * + * @param executable executable, must not be null + * @param image image, must not be null + */ + protected ContainerBuilder(final String executable, final String image) { + super(executable); + setImage(image); + } + + + /** + * Return the container commands for this container builder. + * + * @return the container commands for this container builder + */ + protected abstract List getContainerCommands(); + + /** + * Return the remove argument for this container builder. + * + * @return the remove argument for this container builder + */ + protected abstract List getRemoveArgument(); + + /** + * Format the specified environment variable into a list of string arguments. + * + * @param variable variable + * @param value value + * @return the specified environment variable formatted into a list of string arguments. + */ + protected abstract List formatEnvironment(String variable, String value); + + /** + * Format the specified environment variable into a list of string arguments. + * + * @param image image + * @return the specified environment variable formatted into a list of string arguments. + */ + protected abstract List formatImage(String image); + + /** + * Format the specified mount point into a list of string arguments. + * + * @param source source + * @param target target + * @return the specified mount point formatted into a list of string arguments + */ + protected abstract List formatMount(String source, String target); + + @Override + public final List build() { + if (getExecutable() == null) { + throw new IllegalStateException("executable must not be null"); + } + if (getImage() == null) { + throw new IllegalStateException("image must not be null"); + } + + List command = new ArrayList(); + + // add sudo if necessary + if (isSudo()) { + command.add("sudo"); + } + + // e.g. docker run, etc. + command.addAll(getContainerCommands()); + + // add environment arguments + for (Map.Entry e : getEnvironment().entrySet()) { + command.addAll(formatEnvironment(e.getKey(), e.getValue())); + } + + // add mount arguments + for (Map.Entry e : getMounts().entrySet()) { + command.addAll(formatMount(e.getKey(), e.getValue())); + } + + // e.g. --rm + command.addAll(getRemoveArgument()); + + // container image name + command.addAll(formatImage(getImage())); + + // add command + command.add(getExecutable()); + + // add command arguments + command.addAll(getArguments()); + + return command; + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java new file mode 100644 index 00000000..f396374f --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java @@ -0,0 +1,80 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import java.util.List; + +import com.google.common.collect.ImmutableList; + +/** + * Docker container builder. + */ +public final class DockerBuilder extends ContainerBuilder { + + /** + * Create a new Docker command builder. + */ + public DockerBuilder() { + super(); + } + + /** + * Create a new Docker command builder with the specified executable. + * + * @param executable executable, must not be null + */ + public DockerBuilder(final String executable) { + super(executable); + } + + /** + * Create a new Docker command builder with the specified executable and image. + * + * @param executable executable, must not be null + * @param image image, must not be null + */ + public DockerBuilder(final String executable, final String image) { + super(executable, image); + } + + + @Override + protected List getContainerCommands() { + return ImmutableList.of("docker", "run"); + } + + @Override + protected List getRemoveArgument() { + return ImmutableList.of("--rm"); + } + + @Override + protected List formatImage(final String image) { + return ImmutableList.of(image); + } + + @Override + protected List formatEnvironment(final String variable, final String value) { + return ImmutableList.of("--env", variable + "=" + value); + } + + @Override + protected List formatMount(final String source, final String target) { + return ImmutableList.of("--mount", "type=bind,source=" + source + ",target=" + target); + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java new file mode 100644 index 00000000..7ab6e7fa --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java @@ -0,0 +1,60 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import java.util.ArrayList; +import java.util.List; + +/** + * Local command builder. + */ +public final class LocalBuilder extends CommandBuilder { + + /** + * Create a new local command builder. + */ + public LocalBuilder() { + super(); + } + + /** + * Create a new local command builder with the specified executable. + * + * @param executable executable, must not be null + */ + public LocalBuilder(final String executable) { + super(executable); + } + + + @Override + public final List build() { + if (getExecutable() == null) { + throw new IllegalStateException("executable must not be null"); + } + List command = new ArrayList(); + + // add command + command.add(getExecutable()); + + // add command arguments + command.addAll(getArguments()); + + return command; + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java new file mode 100644 index 00000000..3e20a636 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java @@ -0,0 +1,82 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import java.util.List; + +import com.google.common.collect.ImmutableList; + +/** + * Singularity container builder. + */ +public final class SingularityBuilder extends ContainerBuilder { + + /** + * Create a new Singularity command builder. + */ + public SingularityBuilder() { + super(); + } + + /** + * Create a new Singularity command builder with the specified executable. + * + * @param executable executable, must not be null + */ + public SingularityBuilder(final String executable) { + super(executable); + } + + /** + * Create a new Singularity command builder with the specified executable and image. + * + * @param executable executable, must not be null + * @param image image, must not be null + */ + public SingularityBuilder(final String executable, final String image) { + super(executable); + setImage(image); + } + + + @Override + protected List getContainerCommands() { + return ImmutableList.of("singularity", "exec"); // todo: -q ? + } + + @Override + protected List getRemoveArgument() { + return ImmutableList.of("--rm"); // todo: can't find such + } + + @Override + protected List formatEnvironment(final String variable, final String value) { + return ImmutableList.of("--env", "SINGULARITYENV_" + variable + "=" + value); // todo: --env ? + } + + @Override + protected List formatImage(final String image) { + // todo: add docker:// if necessary + return ImmutableList.of(image); + } + + @Override + protected List formatMount(final String source, final String target) { + return ImmutableList.of("-B", source + ":" + target); // todo: --long-arg for -B ? /mnt issue + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/package-info.java b/core/src/main/java/org/bdgenomics/cannoli/builder/package-info.java new file mode 100644 index 00000000..178fdc13 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Cannoli pipe command builder. + */ +package org.bdgenomics.cannoli.builder; diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java new file mode 100644 index 00000000..16689aee --- /dev/null +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java @@ -0,0 +1,159 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test for DockerBuilder. + */ +public final class DockerBuilderTest { + private DockerBuilder builder; + + @Before + public void setUp() { + builder = new DockerBuilder(); + } + + @Test + public void testCtr() { + assertNotNull(builder); + } + + @Test(expected=NullPointerException.class) + public void setImageNull() { + builder.setImage(null); + } + + @Test(expected=NullPointerException.class) + public void setMountNullSource() { + builder.addMount(null, "/target"); + } + + @Test(expected=NullPointerException.class) + public void addMountNullTarget() { + builder.addMount("/source", null); + } + + @Test(expected=NullPointerException.class) + public void addMountsNull() { + builder.addMounts(null); + } + + @Test + public void testResetEmpty() { + builder.reset(); + assertNull(builder.getExecutable()); + assertNull(builder.getTimeout()); + assertNull(builder.getFlankSize()); + assertTrue(builder.getEnvironment().isEmpty()); + assertTrue(builder.getFiles().isEmpty()); + assertTrue(builder.getArguments().isEmpty()); + assertFalse(builder.getSudo()); + assertNull(builder.getImage()); + assertTrue(builder.getMounts().isEmpty()); + } + + @Test + public void testResetFull() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help") + .setSudo(true) + .setImage("image") + .addMount("/source", "/target"); + + builder.reset(); + assertNull(builder.getExecutable()); + assertNull(builder.getTimeout()); + assertNull(builder.getFlankSize()); + assertTrue(builder.getEnvironment().isEmpty()); + assertTrue(builder.getFiles().isEmpty()); + assertTrue(builder.getArguments().isEmpty()); + assertFalse(builder.getSudo()); + assertNull(builder.getImage()); + assertTrue(builder.getMounts().isEmpty()); + } + + @Test(expected=IllegalStateException.class) + public void testBuildNullExecutable() { + builder.build(); + } + + @Test(expected=IllegalStateException.class) + public void testBuildNullImage() { + builder.setExecutable("foo").build(); + } + + @Test + public void testBuild() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help") + .setSudo(true) + .setImage("image") + .addMount("/source", "/target"); + + assertEquals("foo", builder.getExecutable()); + assertEquals(Long.valueOf(1000L), builder.getTimeout()); + assertEquals(Long.valueOf(1000L), builder.getOptTimeout().get()); + assertEquals(Integer.valueOf(100), builder.getFlankSize()); + assertEquals(Integer.valueOf(100), builder.getOptFlankSize().get()); + assertEquals(1, builder.getEnvironment().size()); + assertEquals("value", builder.getEnvironment().get("VARIABLE")); + assertEquals(1, builder.getFiles().size()); + assertEquals("file", builder.getFiles().get(0)); + assertEquals(1, builder.getArguments().size()); + assertEquals("--help", builder.getArguments().get(0)); + assertTrue(builder.getSudo()); + assertEquals("image", builder.getImage()); + assertEquals(1, builder.getMounts().size()); + assertEquals("/target", builder.getMounts().get("/source")); + + List command = builder.build(); + assertEquals(11, command.size()); + assertEquals("sudo", command.get(0)); + assertEquals("docker", command.get(1)); + assertEquals("run", command.get(2)); + assertEquals("--env", command.get(3)); + assertEquals("VARIABLE=value", command.get(4)); + assertEquals("--mount", command.get(5)); + assertEquals("type=bind,source=/source,target=/target", command.get(6)); + assertEquals("--rm", command.get(7)); + assertEquals("image", command.get(8)); + assertEquals("foo", command.get(9)); + assertEquals("--help", command.get(10)); + } +} diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/LocalBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/LocalBuilderTest.java new file mode 100644 index 00000000..3f30fd6b --- /dev/null +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/LocalBuilderTest.java @@ -0,0 +1,174 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test for LocalBuilder. + */ +public final class LocalBuilderTest { + private LocalBuilder builder; + + @Before + public void setUp() { + builder = new LocalBuilder(); + } + + @Test + public void testCtr() { + assertNotNull(builder); + } + + @Test(expected=NullPointerException.class) + public void testAddNullVarargArguments() { + builder.add((String[]) null); + } + + @Test(expected=NullPointerException.class) + public void testAddNullIterableArguments() { + builder.add((Iterable) null); + } + + @Test(expected=NullPointerException.class) + public void testAddArgumentNull() { + builder.addArgument(null); + } + + @Test(expected=NullPointerException.class) + public void testAddArgumentsNullVarargArguments() { + builder.addArguments((String[]) null); + } + + @Test(expected=NullPointerException.class) + public void testAddArgumentsNullIterableArguments() { + builder.addArguments((Iterable) null); + } + + @Test(expected=NullPointerException.class) + public void testAddEnvironmentNullVariable() { + builder.addEnvironment(null, "value"); + } + + @Test(expected=NullPointerException.class) + public void testAddEnvironmentNullValue() { + builder.addEnvironment("VARIABLE", null); + } + + @Test(expected=NullPointerException.class) + public void testAddEnvironmentNullEnvironment() { + builder.addEnvironment((Map) null); + } + + @Test(expected=NullPointerException.class) + public void testAddFileNull() { + builder.addFile(null); + } + + @Test(expected=NullPointerException.class) + public void testAddFilesNullVarargFiles() { + builder.addFiles((String[]) null); + } + + @Test(expected=NullPointerException.class) + public void testAddFilesNullIterableFiles() { + builder.addFiles((Iterable) null); + } + + @Test(expected=NullPointerException.class) + public void testSetExecutableNull() { + builder.setExecutable(null); + } + + @Test(expected=NullPointerException.class) + public void testSetTimeoutNullTimeUnit() { + builder.setTimeout(1000L, null); + } + + @Test + public void testResetEmpty() { + builder.reset(); + assertNull(builder.getExecutable()); + assertNull(builder.getTimeout()); + assertNull(builder.getFlankSize()); + assertTrue(builder.getEnvironment().isEmpty()); + assertTrue(builder.getFiles().isEmpty()); + assertTrue(builder.getArguments().isEmpty()); + } + + @Test + public void testResetFull() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help"); + + builder.reset(); + assertNull(builder.getExecutable()); + assertNull(builder.getTimeout()); + assertNull(builder.getFlankSize()); + assertTrue(builder.getEnvironment().isEmpty()); + assertTrue(builder.getFiles().isEmpty()); + assertTrue(builder.getArguments().isEmpty()); + } + + @Test(expected=IllegalStateException.class) + public void testBuildNullExecutable() { + builder.build(); + } + + @Test + public void testBuild() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help"); + + assertEquals("foo", builder.getExecutable()); + assertEquals(Long.valueOf(1000L), builder.getTimeout()); + assertEquals(Long.valueOf(1000L), builder.getOptTimeout().get()); + assertEquals(Integer.valueOf(100), builder.getFlankSize()); + assertEquals(Integer.valueOf(100), builder.getOptFlankSize().get()); + assertEquals(1, builder.getEnvironment().size()); + assertEquals("value", builder.getEnvironment().get("VARIABLE")); + assertEquals(1, builder.getFiles().size()); + assertEquals("file", builder.getFiles().get(0)); + assertEquals(1, builder.getArguments().size()); + assertEquals("--help", builder.getArguments().get(0)); + + List command = builder.build(); + assertEquals(2, command.size()); + assertEquals("foo", command.get(0)); + assertEquals("--help", command.get(1)); + } +} diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java new file mode 100644 index 00000000..59be5907 --- /dev/null +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java @@ -0,0 +1,92 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test for SingularityBuilder. + */ +public final class SingularityBuilderTest { + private SingularityBuilder builder; + + @Before + public void setUp() { + builder = new SingularityBuilder(); + } + + @Test + public void testCtr() { + assertNotNull(builder); + } + + @Test + public void testBuild() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help") + .setSudo(true) + .setImage("image") + .addMount("/source", "/target"); + + assertEquals("foo", builder.getExecutable()); + assertEquals(Long.valueOf(1000L), builder.getTimeout()); + assertEquals(Long.valueOf(1000L), builder.getOptTimeout().get()); + assertEquals(Integer.valueOf(100), builder.getFlankSize()); + assertEquals(Integer.valueOf(100), builder.getOptFlankSize().get()); + assertEquals(1, builder.getEnvironment().size()); + assertEquals("value", builder.getEnvironment().get("VARIABLE")); + assertEquals(1, builder.getFiles().size()); + assertEquals("file", builder.getFiles().get(0)); + assertEquals(1, builder.getArguments().size()); + assertEquals("--help", builder.getArguments().get(0)); + assertTrue(builder.getSudo()); + assertEquals("image", builder.getImage()); + assertEquals(1, builder.getMounts().size()); + assertEquals("/target", builder.getMounts().get("/source")); + + List command = builder.build(); + /* + assertEquals(11, command.size()); + assertEquals("sudo", command.get(0)); + assertEquals("singularity", command.get(1)); + assertEquals("exec", command.get(2)); + assertEquals("--env", command.get(3)); + assertEquals("VARIABLE=value", command.get(4)); + assertEquals("-B", command.get(5)); + assertEquals("type=bind,source=/source,target=/target", command.get(6)); + assertEquals("--rm", command.get(7)); + assertEquals("docker://image", command.get(8)); + assertEquals("foo", command.get(9)); + assertEquals("--help", command.get(10)); + */ + } +} diff --git a/pom.xml b/pom.xml index 3fbfee67..6ae9623d 100644 --- a/pom.xml +++ b/pom.xml @@ -28,6 +28,7 @@ 1.8 1.8 2.7.3 + 4.12 1.1.1 0.2.13 @@ -289,13 +290,6 @@ - - org.apache.maven.plugins - maven-surefire-plugin - - true - - org.scalariform scalariform-maven-plugin @@ -348,6 +342,11 @@ + + junit + junit + ${junit.version} + org.apache.hadoop hadoop-client From 7f5a67fbaf9ef7ca2a2762c0d096f43e228c1ac8 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Mon, 19 Feb 2018 15:03:02 -0600 Subject: [PATCH 02/18] scalac, yer killing me --- .../org/bdgenomics/cannoli/cli/Bedtools.scala | 57 ++++++++++--------- .../cannoli/builder/CommandBuilders.java | 49 ++++++++++++++++ .../cannoli/builder/CommandBuildersTest.java | 49 ++++++++++++++++ 3 files changed, 128 insertions(+), 27 deletions(-) create mode 100644 core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java create mode 100644 core/src/test/java/org/bdgenomics/cannoli/builder/CommandBuildersTest.java diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index 5b1165f3..59f2f654 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -25,9 +25,11 @@ import org.bdgenomics.adam.rdd.feature.{ BEDInFormatter, BEDOutFormatter } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bedtools function arguments. @@ -42,14 +44,20 @@ class BedtoolsFnArgs extends Args4jBase { @Args4jOption(required = false, name = "-sorted", usage = "Bedtools intersect -sorted option. Inputs must be sorted by chromosome and then by start position.") var sorted: Boolean = false - @Args4jOption(required = false, name = "-bedtools_path", usage = "Path to the Bedtools executable. Defaults to bedtools.") - var bedtoolsPath: String = "bedtools" + @Args4jOption(required = false, name = "-executable", usage = "Path to the Bedtools executable. Defaults to bedtools.") + var executable: String = "bedtools" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bedtools:2.27.1--0.") - var dockerImage: String = "quay.io/biocontainers/bedtools:2.27.1--0" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bedtools:2.27.1--0.") + var image: String = "quay.io/biocontainers/bedtools:2.27.1--0" - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bedtools. If false, uses the Bedtools executable path.") + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false + + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bedtools.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Bedtools.") + var useSingularity: Boolean = false } /** @@ -91,28 +99,22 @@ class BedtoolsFn( require(optA.size + optB.size == 1, "Strictly one of {-a,-b} should be left unspecified to accept piped input.") - val bedtoolsCommand = if (args.useDocker) { - Seq("docker", - "run", - "--rm", - args.dockerImage, - "bedtools", - "intersect", - "-a", - optA.getOrElse("stdin"), - "-b", - optB.getOrElse("stdin"), - if (args.sorted) "-sorted" else "" - ) - } else { - Seq(args.bedtoolsPath, - "intersect", - "-a", - optA.getOrElse("stdin"), - "-b", - optB.getOrElse("stdin"), - if (args.sorted) "-sorted" else "" - ) + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + + // todo: wtf? scalac, yer killing me... + builder = builder.setExecutable(args.executable) + builder = builder.addArgument("intersect") + builder = builder.add("-a") + builder = builder.add(optA.getOrElse("stdin")) + builder = builder.add("-b") + builder = builder.add(optB.getOrElse("stdin")) + + if (args.sorted) builder = builder.add("-sorted") + + // todo: add file in optA or optB to files and mounts? + if (args.useDocker || args.useSingularity) { + builder = builder.setImage(args.image) + builder = builder.setSudo(args.sudo) } log.info("Piping {} to bedtools with command: {} files: {} environment: {}", @@ -120,6 +122,7 @@ class BedtoolsFn( implicit val tFormatter = BEDInFormatter implicit val uFormatter = new BEDOutFormatter + features.pipe(bedtoolsCommand, files, environment) } } diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java new file mode 100644 index 00000000..73275429 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java @@ -0,0 +1,49 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import java.io.Serializable; + +/** + * Cannoli pipe command builders. + */ +public final class CommandBuilders implements Serializable { + + /** + * Create a new Cannoli pipe command builder. + * + * @param command builder + * @param useDocker true to use Docker, + * useDocker and useSingularity must not both be true + * @param useSingularity true to use Singularity, + * useDocker and useSingularity must not both be true + * @return a new Cannoli pipe command builder + */ + public static CommandBuilder create(final boolean useDocker, final boolean useSingularity) { + if (useDocker && useSingularity) { + throw new IllegalArgumentException("useDocker and useSingularity must not both be true"); + } + if (useDocker) { + return ((CommandBuilder) new DockerBuilder()); + } + else if (useSingularity) { + return ((CommandBuilder) new SingularityBuilder()); + } + return ((CommandBuilder) new LocalBuilder()); + } +} diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/CommandBuildersTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/CommandBuildersTest.java new file mode 100644 index 00000000..a6a4ef08 --- /dev/null +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/CommandBuildersTest.java @@ -0,0 +1,49 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +/** + * Unit test for CommandBuilders. + */ +public final class CommandBuildersTest { + + @Test + public void testCreateLocalBuilder() { + assertFalse(CommandBuilders.create(false, false).setExecutable("foo").setSudo(true).build().contains("sudo")); + } + + @Test + public void testCreateDockerBuilder() { + assertTrue(CommandBuilders.create(true, false).setExecutable("foo").setImage("image").build().contains("docker")); + } + + @Test + public void testCreateSingularityBuilder() { + assertTrue(CommandBuilders.create(false, true).setExecutable("foo").setImage("image").build().contains("singularity")); + } + + @Test(expected=IllegalArgumentException.class) + public void testCreateBothTrue() { + CommandBuilders.create(true, true); + } +} From adb4535bb06321a8eb53e94fcbf75e7173ba8423 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Tue, 20 Feb 2018 13:27:57 -0600 Subject: [PATCH 03/18] Remove extraneous type parameter. --- .../org/bdgenomics/cannoli/cli/Bedtools.scala | 21 +++-- .../cannoli/builder/CommandBuilder.java | 76 +++++++++---------- .../cannoli/builder/CommandBuilders.java | 9 +-- .../cannoli/builder/ContainerBuilder.java | 4 +- .../cannoli/builder/DockerBuilder.java | 2 +- .../cannoli/builder/LocalBuilder.java | 2 +- .../cannoli/builder/SingularityBuilder.java | 2 +- 7 files changed, 55 insertions(+), 61 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index 59f2f654..964efdcd 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -100,21 +100,20 @@ class BedtoolsFn( "Strictly one of {-a,-b} should be left unspecified to accept piped input.") var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("intersect") + .add("-a") + .add(optA.getOrElse("stdin")) + .add("-b") + .add(optB.getOrElse("stdin")) - // todo: wtf? scalac, yer killing me... - builder = builder.setExecutable(args.executable) - builder = builder.addArgument("intersect") - builder = builder.add("-a") - builder = builder.add(optA.getOrElse("stdin")) - builder = builder.add("-b") - builder = builder.add(optB.getOrElse("stdin")) - - if (args.sorted) builder = builder.add("-sorted") + if (args.sorted) builder.add("-sorted") // todo: add file in optA or optB to files and mounts? if (args.useDocker || args.useSingularity) { - builder = builder.setImage(args.image) - builder = builder.setSudo(args.sudo) + builder + .setImage(args.image) + .setSudo(args.sudo) } log.info("Piping {} to bedtools with command: {} files: {} environment: {}", diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java index 405cd309..471c2049 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java @@ -36,10 +36,8 @@ /** * Abstract command builder. - * - * @param C command builder */ -abstract class CommandBuilder implements Serializable { +abstract class CommandBuilder implements Serializable { /** Command to run. */ private String executable; @@ -92,10 +90,10 @@ protected CommandBuilder(final String executable) { * @param executable executable, must not be null * @return this command builder */ - public final C setExecutable(final String executable) { + public final CommandBuilder setExecutable(final String executable) { checkNotNull(executable); this.executable = executable; - return (C) this; + return this; } /** @@ -104,9 +102,9 @@ public final C setExecutable(final String executable) { * @param flankSize number of bases to flank each command invocation by * @return this command builder */ - public final C setFlankSize(@Nullable final Integer flankSize) { + public final CommandBuilder setFlankSize(@Nullable final Integer flankSize) { this.flankSize = flankSize; - return (C) this; + return this; } /** @@ -115,10 +113,10 @@ public final C setFlankSize(@Nullable final Integer flankSize) { * @param image image, must not be null * @return this command builder */ - public final C setImage(final String image) { + public final CommandBuilder setImage(final String image) { checkNotNull(image); this.image = image; - return (C) this; + return this; } /** @@ -127,9 +125,9 @@ public final C setImage(final String image) { * @param sudo true to run via sudo * @return this command builder */ - public final C setSudo(final boolean sudo) { + public final CommandBuilder setSudo(final boolean sudo) { this.sudo = sudo; - return (C) this; + return this; } /** @@ -138,9 +136,9 @@ public final C setSudo(final boolean sudo) { * @param timeout how long to let a single partition run for, in seconds * @return this command builder */ - public final C setTimeout(@Nullable final Long timeout) { + public final CommandBuilder setTimeout(@Nullable final Long timeout) { this.timeout = timeout; - return (C) this; + return this; } /** @@ -150,10 +148,10 @@ public final C setTimeout(@Nullable final Long timeout) { * @param timeUnit time unit, must not be null * @return this command builder */ - public final C setTimeout(final long duration, final TimeUnit timeUnit) { + public final CommandBuilder setTimeout(final long duration, final TimeUnit timeUnit) { checkNotNull(timeUnit); this.timeout = timeUnit.toSeconds(duration); - return (C) this; + return this; } @@ -163,7 +161,7 @@ public final C setTimeout(final long duration, final TimeUnit timeUnit) { * @param arguments variable number of arguments to add, must not be null * @return this command builder */ - public final C add(final String... arguments) { + public final CommandBuilder add(final String... arguments) { return addArguments(arguments); } @@ -173,7 +171,7 @@ public final C add(final String... arguments) { * @param arguments arguments to add, must not be null * @return this command builder */ - public final C add(final Iterable arguments) { + public final CommandBuilder add(final Iterable arguments) { return addArguments(arguments); } @@ -183,10 +181,10 @@ public final C add(final Iterable arguments) { * @param argument argument to add, must not be null * @return this command builder */ - public final C addArgument(final String argument) { + public final CommandBuilder addArgument(final String argument) { checkNotNull(argument); arguments.add(argument); - return (C) this; + return this; } /** @@ -195,12 +193,12 @@ public final C addArgument(final String argument) { * @param arguments variable number of arguments to add, must not be null * @return this command builder */ - public final C addArguments(final String... arguments) { + public final CommandBuilder addArguments(final String... arguments) { checkNotNull(arguments); for (String argument : arguments) { this.arguments.add(argument); } - return (C) this; + return this; } /** @@ -209,12 +207,12 @@ public final C addArguments(final String... arguments) { * @param arguments arguments to add, must not be null * @return this command builder */ - public final C addArguments(final Iterable arguments) { + public final CommandBuilder addArguments(final Iterable arguments) { checkNotNull(arguments); for (String argument : arguments) { this.arguments.add(argument); } - return (C) this; + return this; } /** @@ -224,11 +222,11 @@ public final C addArguments(final Iterable arguments) { * @param value environment variable value to add, must not be null * @return this command builder */ - public final C addEnvironment(final String variable, final String value) { + public final CommandBuilder addEnvironment(final String variable, final String value) { checkNotNull(variable); checkNotNull(value); environment.put(variable, value); - return (C) this; + return this; } /** @@ -237,10 +235,10 @@ public final C addEnvironment(final String variable, final String value) { * @param environment environment variables to add, must not be null * @return this command builder */ - public final C addEnvironment(final Map environment) { + public final CommandBuilder addEnvironment(final Map environment) { checkNotNull(environment); this.environment.putAll(environment); - return (C) this; + return this; } /** @@ -249,10 +247,10 @@ public final C addEnvironment(final Map environment) { * @param file file to add, must not be null * @return this command builder */ - public final C addFile(final String file) { + public final CommandBuilder addFile(final String file) { checkNotNull(file); files.add(file); - return (C) this; + return this; } /** @@ -261,12 +259,12 @@ public final C addFile(final String file) { * @param files variable number of files to add, must not be null * @return this command builder */ - public final C addFiles(final String... files) { + public final CommandBuilder addFiles(final String... files) { checkNotNull(files); for (String file : files) { this.files.add(file); } - return (C) this; + return this; } /** @@ -275,12 +273,12 @@ public final C addFiles(final String... files) { * @param files files to add, must not be null * @return this command builder */ - public final C addFiles(final Iterable files) { + public final CommandBuilder addFiles(final Iterable files) { checkNotNull(files); for (String file : files) { this.files.add(file); } - return (C) this; + return this; } /** @@ -290,11 +288,11 @@ public final C addFiles(final Iterable files) { * @param target mount point target, must not be null * @return this command builder */ - public final C addMount(final String source, final String target) { + public final CommandBuilder addMount(final String source, final String target) { checkNotNull(source); checkNotNull(target); mounts.put(source, target); - return (C) this; + return this; } /** @@ -302,10 +300,10 @@ public final C addMount(final String source, final String target) { * * @param mounts mount points to add, must not be null */ - public final C addMounts(final Map mounts) { + public final CommandBuilder addMounts(final Map mounts) { checkNotNull(mounts); this.mounts.putAll(mounts); - return (C) this; + return this; } @@ -422,7 +420,7 @@ public final Map getMounts() { * * @return this command builder */ - public final C reset() { + public final CommandBuilder reset() { executable = null; flankSize = null; image = null; @@ -434,7 +432,7 @@ public final C reset() { files.clear(); mounts.clear(); - return (C) this; + return this; } /** diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java index 73275429..b8284207 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java @@ -27,23 +27,22 @@ public final class CommandBuilders implements Serializable { /** * Create a new Cannoli pipe command builder. * - * @param command builder * @param useDocker true to use Docker, * useDocker and useSingularity must not both be true * @param useSingularity true to use Singularity, * useDocker and useSingularity must not both be true * @return a new Cannoli pipe command builder */ - public static CommandBuilder create(final boolean useDocker, final boolean useSingularity) { + public static CommandBuilder create(final boolean useDocker, final boolean useSingularity) { if (useDocker && useSingularity) { throw new IllegalArgumentException("useDocker and useSingularity must not both be true"); } if (useDocker) { - return ((CommandBuilder) new DockerBuilder()); + return new DockerBuilder(); } else if (useSingularity) { - return ((CommandBuilder) new SingularityBuilder()); + return new SingularityBuilder(); } - return ((CommandBuilder) new LocalBuilder()); + return new LocalBuilder(); } } diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java index 29ee8b61..fac8348b 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java @@ -29,10 +29,8 @@ /** * Abstract container builder. - * - * @param C container builder */ -abstract class ContainerBuilder extends CommandBuilder { +abstract class ContainerBuilder extends CommandBuilder { /** * Create a new container builder. diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java index f396374f..9b0c9254 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java @@ -24,7 +24,7 @@ /** * Docker container builder. */ -public final class DockerBuilder extends ContainerBuilder { +public final class DockerBuilder extends ContainerBuilder { /** * Create a new Docker command builder. diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java index 7ab6e7fa..08a367f3 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java @@ -23,7 +23,7 @@ /** * Local command builder. */ -public final class LocalBuilder extends CommandBuilder { +public final class LocalBuilder extends CommandBuilder { /** * Create a new local command builder. diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java index 3e20a636..c2acbd89 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java @@ -24,7 +24,7 @@ /** * Singularity container builder. */ -public final class SingularityBuilder extends ContainerBuilder { +public final class SingularityBuilder extends ContainerBuilder { /** * Create a new Singularity command builder. From 037103b848b1c936042b334de59e8b351a0ba761 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Tue, 20 Feb 2018 14:24:45 -0600 Subject: [PATCH 04/18] Fix commands for Singularity builder. --- .../cannoli/builder/ContainerBuilder.java | 18 ++++++++- .../cannoli/builder/DockerBuilder.java | 8 +++- .../cannoli/builder/SingularityBuilder.java | 28 ++++++++++--- .../builder/SingularityBuilderTest.java | 40 ++++++++++++------- 4 files changed, 71 insertions(+), 23 deletions(-) diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java index fac8348b..862ec83c 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java @@ -74,6 +74,15 @@ protected ContainerBuilder(final String executable, final String image) { */ protected abstract List getRemoveArgument(); + /** + * Format the specified environment variable into a list of strings. + * + * @param variable variable + * @param value value + * @return the specified environment variable formatted into a list of strings. + */ + protected abstract List formatEnvironmentVariable(String variable, String value); + /** * Format the specified environment variable into a list of string arguments. * @@ -81,7 +90,7 @@ protected ContainerBuilder(final String executable, final String image) { * @param value value * @return the specified environment variable formatted into a list of string arguments. */ - protected abstract List formatEnvironment(String variable, String value); + protected abstract List formatEnvironmentArgument(String variable, String value); /** * Format the specified environment variable into a list of string arguments. @@ -111,6 +120,11 @@ public final List build() { List command = new ArrayList(); + // add environment variables + for (Map.Entry e : getEnvironment().entrySet()) { + command.addAll(formatEnvironmentVariable(e.getKey(), e.getValue())); + } + // add sudo if necessary if (isSudo()) { command.add("sudo"); @@ -121,7 +135,7 @@ public final List build() { // add environment arguments for (Map.Entry e : getEnvironment().entrySet()) { - command.addAll(formatEnvironment(e.getKey(), e.getValue())); + command.addAll(formatEnvironmentArgument(e.getKey(), e.getValue())); } // add mount arguments diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java index 9b0c9254..e6e66872 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java @@ -17,6 +17,7 @@ */ package org.bdgenomics.cannoli.builder; +import java.util.Collections; import java.util.List; import com.google.common.collect.ImmutableList; @@ -69,7 +70,12 @@ protected List formatImage(final String image) { } @Override - protected List formatEnvironment(final String variable, final String value) { + protected List formatEnvironmentVariable(final String variable, final String value) { + return Collections.emptyList(); + } + + @Override + protected List formatEnvironmentArgument(final String variable, final String value) { return ImmutableList.of("--env", variable + "=" + value); } diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java index c2acbd89..c2f236a3 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java @@ -17,14 +17,19 @@ */ package org.bdgenomics.cannoli.builder; +import java.util.Collections; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import com.google.common.collect.ImmutableList; /** * Singularity container builder. */ public final class SingularityBuilder extends ContainerBuilder { + private static final Pattern SCHEME = Pattern.compile("^[^/:\\. ]+://(.*)"); /** * Create a new Singularity command builder. @@ -61,22 +66,33 @@ protected List getContainerCommands() { @Override protected List getRemoveArgument() { - return ImmutableList.of("--rm"); // todo: can't find such + return Collections.emptyList(); + } + + @Override + protected List formatEnvironmentVariable(final String variable, final String value) { + return ImmutableList.of("SINGULARITYENV_" + variable + "=" + value); } @Override - protected List formatEnvironment(final String variable, final String value) { - return ImmutableList.of("--env", "SINGULARITYENV_" + variable + "=" + value); // todo: --env ? + protected List formatEnvironmentArgument(final String variable, final String value) { + return Collections.emptyList(); } @Override protected List formatImage(final String image) { - // todo: add docker:// if necessary - return ImmutableList.of(image); + if (image.startsWith("/")) { + return ImmutableList.of(image); + } + Matcher m = SCHEME.matcher(image); + if (m.matches()) { + return ImmutableList.of(image); + } + return ImmutableList.of("docker://" + image); } @Override protected List formatMount(final String source, final String target) { - return ImmutableList.of("-B", source + ":" + target); // todo: --long-arg for -B ? /mnt issue + return ImmutableList.of("--bind", source + ":" + target); // todo: /mnt issue } } diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java index 59be5907..87552c06 100644 --- a/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java @@ -74,19 +74,31 @@ public void testBuild() { assertEquals("/target", builder.getMounts().get("/source")); List command = builder.build(); - /* - assertEquals(11, command.size()); - assertEquals("sudo", command.get(0)); - assertEquals("singularity", command.get(1)); - assertEquals("exec", command.get(2)); - assertEquals("--env", command.get(3)); - assertEquals("VARIABLE=value", command.get(4)); - assertEquals("-B", command.get(5)); - assertEquals("type=bind,source=/source,target=/target", command.get(6)); - assertEquals("--rm", command.get(7)); - assertEquals("docker://image", command.get(8)); - assertEquals("foo", command.get(9)); - assertEquals("--help", command.get(10)); - */ + + assertEquals(9, command.size()); + assertEquals("SINGULARITYENV_VARIABLE=value", command.get(0)); + assertEquals("sudo", command.get(1)); + assertEquals("singularity", command.get(2)); + assertEquals("exec", command.get(3)); + assertEquals("--bind", command.get(4)); + assertEquals("/source:/target", command.get(5)); + assertEquals("docker://image", command.get(6)); + assertEquals("foo", command.get(7)); + assertEquals("--help", command.get(8)); + } + + @Test + public void testImage() { + assertTrue(new SingularityBuilder("foo", "/image").build().contains("/image")); + } + + @Test + public void testShubImage() { + assertTrue(new SingularityBuilder("foo", "shub://image").build().contains("shub://image")); + } + + @Test + public void testDockerImage() { + assertTrue(new SingularityBuilder("foo", "image").build().contains("docker://image")); } } From 2e1b693e5fba3762eaf8d780a3cd8dbc0975f43f Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Fri, 23 Feb 2018 23:02:09 -0600 Subject: [PATCH 05/18] Add files and root mounts. --- .../org/bdgenomics/cannoli/cli/Bedtools.scala | 25 ++++++++++++++++--- .../cannoli/builder/CommandBuilder.java | 12 ++++++++- .../cannoli/builder/SingularityBuilder.java | 2 +- .../cannoli/builder/DockerBuilderTest.java | 17 ++++++++++++- .../builder/SingularityBuilderTest.java | 10 ++++++++ 5 files changed, 59 insertions(+), 7 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index 964efdcd..69fc4bc9 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -17,6 +17,7 @@ */ package org.bdgenomics.cannoli.cli +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs @@ -53,6 +54,9 @@ class BedtoolsFnArgs extends Args4jBase { @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") var sudo: Boolean = false + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bedtools.") var useDocker: Boolean = false @@ -99,21 +103,30 @@ class BedtoolsFn( require(optA.size + optB.size == 1, "Strictly one of {-a,-b} should be left unspecified to accept piped input.") + val file = List(optA, optB).flatten.get(0) + + def root(): String = { + val path = new Path(file) + val fs = path.getFileSystem(sc.hadoopConfiguration) + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) .add("intersect") .add("-a") - .add(optA.getOrElse("stdin")) + .add(optA.fold("stdin")(if (args.addFiles) "$0" else _)) .add("-b") - .add(optB.getOrElse("stdin")) + .add(optB.fold("stdin")(if (args.addFiles) "$0" else _)) if (args.sorted) builder.add("-sorted") + if (args.addFiles) builder.addFile(file) - // todo: add file in optA or optB to files and mounts? if (args.useDocker || args.useSingularity) { builder .setImage(args.image) .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root()) } log.info("Piping {} to bedtools with command: {} files: {} environment: {}", @@ -122,7 +135,11 @@ class BedtoolsFn( implicit val tFormatter = BEDInFormatter implicit val uFormatter = new BEDOutFormatter - features.pipe(bedtoolsCommand, files, environment) + features.pipe( + cmd = bedtoolsCommand, + files = files, + environment = environment + ) } } diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java index 471c2049..353fd26c 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java @@ -37,7 +37,7 @@ /** * Abstract command builder. */ -abstract class CommandBuilder implements Serializable { +public abstract class CommandBuilder implements Serializable { /** Command to run. */ private String executable; @@ -281,6 +281,16 @@ public final CommandBuilder addFiles(final Iterable files) { return this; } + /** + * Add the specified mount point to the map of mount points for this command builder. + * + * @param source mount point source and target, must not be null + * @return this command builder + */ + public final CommandBuilder addMount(final String mount) { + return addMount(mount, mount); + } + /** * Add the specified mount point to the map of mount points for this command builder. * diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java index c2f236a3..b44ceb01 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java @@ -93,6 +93,6 @@ protected List formatImage(final String image) { @Override protected List formatMount(final String source, final String target) { - return ImmutableList.of("--bind", source + ":" + target); // todo: /mnt issue + return ImmutableList.of("--bind", source.equals(target) ? source : source + ":" + target); // todo: /mnt issue } } diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java index 16689aee..e3265865 100644 --- a/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java @@ -50,7 +50,12 @@ public void setImageNull() { } @Test(expected=NullPointerException.class) - public void setMountNullSource() { + public void addMountNull() { + builder.addMount(null); + } + + @Test(expected=NullPointerException.class) + public void addMountNullSource() { builder.addMount(null, "/target"); } @@ -156,4 +161,14 @@ public void testBuild() { assertEquals("foo", command.get(9)); assertEquals("--help", command.get(10)); } + + @Test + public void testBuildMount() { + builder + .setExecutable("foo") + .setImage("image") + .addMount("/mount"); + + assertTrue(builder.build().contains("type=bind,source=/mount,target=/mount")); + } } diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java index 87552c06..7b0ca9bb 100644 --- a/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java @@ -101,4 +101,14 @@ public void testShubImage() { public void testDockerImage() { assertTrue(new SingularityBuilder("foo", "image").build().contains("docker://image")); } + + @Test + public void testBuildMount() { + builder + .setExecutable("foo") + .setImage("image") + .addMount("/mount"); + + assertTrue(builder.build().contains("/mount")); + } } From d5ba7323a13b50ba47e48b826d064282f6e9b257 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Mon, 26 Feb 2018 13:55:59 -0600 Subject: [PATCH 06/18] Use -i and -v for Docker. --- .../cannoli/builder/DockerBuilder.java | 4 ++-- .../cannoli/builder/DockerBuilderTest.java | 21 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java index e6e66872..9bfa7d1e 100644 --- a/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java @@ -56,7 +56,7 @@ public DockerBuilder(final String executable, final String image) { @Override protected List getContainerCommands() { - return ImmutableList.of("docker", "run"); + return ImmutableList.of("docker", "run", "-i"); } @Override @@ -81,6 +81,6 @@ protected List formatEnvironmentArgument(final String variable, final St @Override protected List formatMount(final String source, final String target) { - return ImmutableList.of("--mount", "type=bind,source=" + source + ",target=" + target); + return ImmutableList.of("-v", source + ":" + target); } } diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java index e3265865..3012679b 100644 --- a/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java @@ -148,18 +148,19 @@ public void testBuild() { assertEquals("/target", builder.getMounts().get("/source")); List command = builder.build(); - assertEquals(11, command.size()); + assertEquals(12, command.size()); assertEquals("sudo", command.get(0)); assertEquals("docker", command.get(1)); assertEquals("run", command.get(2)); - assertEquals("--env", command.get(3)); - assertEquals("VARIABLE=value", command.get(4)); - assertEquals("--mount", command.get(5)); - assertEquals("type=bind,source=/source,target=/target", command.get(6)); - assertEquals("--rm", command.get(7)); - assertEquals("image", command.get(8)); - assertEquals("foo", command.get(9)); - assertEquals("--help", command.get(10)); + assertEquals("-i", command.get(3)); + assertEquals("--env", command.get(4)); + assertEquals("VARIABLE=value", command.get(5)); + assertEquals("-v", command.get(6)); + assertEquals("/source:/target", command.get(7)); + assertEquals("--rm", command.get(8)); + assertEquals("image", command.get(9)); + assertEquals("foo", command.get(10)); + assertEquals("--help", command.get(11)); } @Test @@ -169,6 +170,6 @@ public void testBuildMount() { .setImage("image") .addMount("/mount"); - assertTrue(builder.build().contains("type=bind,source=/mount,target=/mount")); + assertTrue(builder.build().contains("/mount:/mount")); } } From 4fb9bb51f0c5e668e0f47ab823695ca14d5c5eff Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Tue, 27 Feb 2018 12:00:51 -0600 Subject: [PATCH 07/18] Fixup after rebase. --- .../org/bdgenomics/cannoli/cli/Bedtools.scala | 31 +++++-------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index 69fc4bc9..d6621ea7 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -73,29 +73,15 @@ class BedtoolsFnArgs extends Args4jBase { * args.b = "foo.bed" * args.useDocker = true * val features = ... - * val pipedFeatures = new BedtoolsFn(args).apply(features) + * val pipedFeatures = new BedtoolsFn(args, sc).apply(features) * * * @param args Bedtools function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. + * @param sc Spark context. */ class BedtoolsFn( val args: BedtoolsFnArgs, - val files: Seq[String], - val environment: Map[String, String]) extends Function1[FeatureRDD, FeatureRDD] with Logging { - - /** - * @param args Bedtools function arguments. - */ - def this(args: BedtoolsFnArgs) = this(args, Seq.empty, Map.empty) - - /** - * @param args Bedtools function arguments. - * @param files Files to make locally available to the commands being run. - */ - def this(args: BedtoolsFnArgs, files: Seq[String]) = this(args, files, Map.empty) + val sc: SparkContext) extends Function1[FeatureRDD, FeatureRDD] with Logging { override def apply(features: FeatureRDD): FeatureRDD = { val optA = Option(args.a) @@ -129,16 +115,15 @@ class BedtoolsFn( .addMount(if (args.addFiles) "$root" else root()) } - log.info("Piping {} to bedtools with command: {} files: {} environment: {}", - Array(features, bedtoolsCommand, files, environment)) + log.info("Piping {} to bedtools with command: {} files: {}", + Array(features, builder.build(), builder.getFiles())) implicit val tFormatter = BEDInFormatter implicit val uFormatter = new BEDOutFormatter features.pipe( - cmd = bedtoolsCommand, - files = files, - environment = environment + cmd = builder.build(), + files = builder.getFiles() ) } } @@ -183,7 +168,7 @@ class Bedtools(protected val args: BedtoolsArgs) extends BDGSparkCommand[Bedtool override def run(sc: SparkContext) { val features = sc.loadFeatures(args.inputPath) - val pipedFeatures = new BedtoolsFn(args).apply(features) + val pipedFeatures = new BedtoolsFn(args, sc).apply(features) pipedFeatures.save(args.outputPath, asSingleFile = args.asSingleFile, From 438d25cef834fe95cf36cb6507b3940502523579 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Tue, 27 Feb 2018 12:35:26 -0600 Subject: [PATCH 08/18] Add partitions and limit projection arguments, improve usage doc. --- .../org/bdgenomics/cannoli/cli/Bedtools.scala | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index d6621ea7..160b8fde 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -19,6 +19,7 @@ package org.bdgenomics.cannoli.cli import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext +import org.bdgenomics.adam.projections.{ FeatureField, Projection } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.feature.{ @@ -141,12 +142,18 @@ object Bedtools extends BDGCommandCompanion { * Bedtools command line arguments. */ class BedtoolsArgs extends BedtoolsFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe features from (e.g., .bed, .gff/.gtf, .gff3, .interval_list, .narrowPeak). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe features to. If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null + @Args4jOption(required = false, name = "-limit_projection", usage = "If input is Parquet, limit to BED format-only fields by projection.") + var limitProjection: Boolean = false + + @Args4jOption(required = false, name = "-partitions", usage = "Number of partitions to use when loading a text file.") + var partitions: Int = _ + @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") var asSingleFile: Boolean = false @@ -167,7 +174,20 @@ class Bedtools(protected val args: BedtoolsArgs) extends BDGSparkCommand[Bedtool val companion = Bedtools override def run(sc: SparkContext) { - val features = sc.loadFeatures(args.inputPath) + val projection = Projection( + FeatureField.contigName, + FeatureField.start, + FeatureField.end, + FeatureField.name, + FeatureField.score, + FeatureField.strand + ) + + val features = sc.loadFeatures( + args.inputPath, + optMinPartitions = Option(args.partitions), + optProjection = if (args.limitProjection) Some(projection) else None + ) val pipedFeatures = new BedtoolsFn(args, sc).apply(features) pipedFeatures.save(args.outputPath, From 0c083822a7e13de494e9e1b78908ddcee4e8f552 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Tue, 27 Feb 2018 15:49:40 -0600 Subject: [PATCH 09/18] Update Freebayes to use command builder. --- .../bdgenomics/cannoli/cli/Freebayes.scala | 121 +++++++++++------- 1 file changed, 77 insertions(+), 44 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala index 2e55c8d5..c3c91e7c 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala @@ -19,6 +19,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency import htsjdk.variant.vcf.VCFHeaderLine +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.apache.spark.util.CollectionAccumulator import org.bdgenomics.adam.models.VariantContext @@ -26,6 +27,7 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, BAMInFormatter } import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD, VCFOutFormatter } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } @@ -35,17 +37,32 @@ import scala.collection.JavaConversions._ * Freebayes function arguments. */ class FreebayesFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-freebayes_path", usage = "Path to the Freebayes executable. Defaults to freebayes.") - var freebayesPath: String = "freebayes" + @Args4jOption(required = false, name = "-executable", usage = "Path to the Freebayes executable. Defaults to freebayes.") + var executable: String = "freebayes" - @Args4jOption(required = true, name = "-freebayes_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") - var referencePath: String = null + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2") + var image: String = "quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2" + + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2.") - var dockerImage: String = "quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2" + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Freebayes. If false, uses the Freebayes executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Freebayes.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Freebayes.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-fasta_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") + var referencePath: String = null + + @Args4jOption(required = false, name = "-gvcf", usage = "Write gVCF output or equivalent genotypes which indicate coverage in uncalled regions.") + var gvcf: Boolean = false + + @Args4jOption(required = false, name = "-gvcf_chunk", usage = "When writing gVCF output or equivalent genotypes emit a record for every N bases.") + var gvcfChunk: Int = _ } /** @@ -53,57 +70,55 @@ class FreebayesFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Freebayes function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class FreebayesFn( val args: FreebayesFnArgs, - val files: Seq[String], - val environment: Map[String, String], val sc: SparkContext) extends Function1[AlignmentRecordRDD, VariantContextRDD] with Logging { - /** - * @param args Freebayes function arguments. - * @param sc Spark context. - */ - def this(args: FreebayesFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) + override def apply(alignments: AlignmentRecordRDD): VariantContextRDD = { + def root(): String = { + val path = new Path(args.referencePath) + val fs = path.getFileSystem(sc.hadoopConfiguration) + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } - /** - * @param args Freebayes function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: FreebayesFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("--fasta-reference") + .add(if (args.addFiles) "$0" else args.referencePath) + .add("--stdin") + .add("--strict-vcf") - override def apply(alignments: AlignmentRecordRDD): VariantContextRDD = { + if (args.gvcf) { + builder.add("--gvcf") + Option(args.gvcfChunk).foreach(i => builder.add("--gvcf-chunk").add(i.toString)) + } - val freebayesCommand = if (args.useDocker) { - Seq("docker", - "run", - "--rm", - args.dockerImage, - "freebayes", - "--fasta-reference", - args.referencePath, - "--stdin") - } else { - Seq(args.freebayesPath, - "--fasta-reference", - args.referencePath, - "--stdin") + if (args.addFiles) { + builder.addFile(args.referencePath) + builder.addFile(args.referencePath + ".fai") } - log.info("Piping {} to freebayes with command: {} files: {} environment: {}", - Array(alignments, freebayesCommand, files, environment)) + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root()) + } + + log.info("Piping {} to freebayes with command: {} files: {}", + Array(alignments, builder.build(), builder.getFiles())) val accumulator: CollectionAccumulator[VCFHeaderLine] = sc.collectionAccumulator("headerLines") implicit val tFormatter = BAMInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration, Some(accumulator)) - val variantContexts = alignments.pipe[VariantContext, VariantContextRDD, BAMInFormatter](freebayesCommand, files, environment) + val variantContexts = alignments.pipe[VariantContext, VariantContextRDD, BAMInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) val headerLines = accumulator.value.distinct variantContexts.replaceHeaderLines(headerLines) @@ -123,10 +138,10 @@ object Freebayes extends BDGCommandCompanion { * Freebayes command line arguments. */ class FreebayesArgs extends FreebayesFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe genotypes to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") @@ -152,9 +167,27 @@ class Freebayes(protected val args: FreebayesArgs) extends BDGSparkCommand[Freeb val companion = Freebayes val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency) + // todo: org.bdgenomics.adam.util.FileExtensions is inaccessible + def isVcfExt(pathName: String): Boolean = { + pathName.endsWith(".vcf") || + pathName.endsWith(".vcf.gz") || + pathName.endsWith(".vcf.bgz") + } + def run(sc: SparkContext) { val alignments = sc.loadAlignments(args.inputPath, stringency = stringency) val variantContexts = new FreebayesFn(args, sc).apply(alignments) - variantContexts.saveAsVcf(args, stringency) + + if (isVcfExt(args.outputPath)) { + variantContexts.saveAsVcf( + args.inputPath, + asSingleFile = args.asSingleFile, + deferMerging = args.deferMerging, + disableFastConcat = args.disableFastConcat, + stringency + ) + } else { + variantContexts.toGenotypes.saveAsParquet(args) + } } } From daf5b9ce1b086dbe963d08a54e801e2a22c68c51 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Thu, 1 Mar 2018 10:36:34 -0600 Subject: [PATCH 10/18] Update remaining commands to use command builder. --- .../org/bdgenomics/cannoli/cli/Bcftools.scala | 87 ++++++------- .../org/bdgenomics/cannoli/cli/Bowtie.scala | 96 ++++++++------- .../org/bdgenomics/cannoli/cli/Bowtie2.scala | 96 ++++++++------- .../org/bdgenomics/cannoli/cli/Bwa.scala | 112 +++++++---------- .../bdgenomics/cannoli/cli/Freebayes.scala | 10 +- .../org/bdgenomics/cannoli/cli/Samtools.scala | 116 ++++++++++-------- .../org/bdgenomics/cannoli/cli/SnpEff.scala | 70 +++++------ .../scala/org/bdgenomics/cannoli/cli/Vt.scala | 92 +++++++------- 8 files changed, 336 insertions(+), 343 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala index 7a6196ff..fa5ec665 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala @@ -18,6 +18,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs @@ -26,25 +27,36 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bcftools function arguments. */ class BcftoolsFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-bcftools_path", usage = "Path to the BCFtools executable. Defaults to bcftools.") - var bcftoolsPath: String = "bcftools" + @Args4jOption(required = false, name = "-executable", usage = "Path to the BCFtools executable. Defaults to bcftools.") + var executable: String = "bcftools" - @Args4jOption(required = true, name = "-bcftools_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") - var referencePath: String = null + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bcftools:1.6--0.") + var image: String = "quay.io/biocontainers/bcftools:1.6--0" + + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bcftools:1.6--0.") - var dockerImage: String = "quay.io/biocontainers/bcftools:1.6--0" + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch BCFtools. If false, uses the BCFtools executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch BCFtools.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch BCFtools.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") + var referencePath: String = null } /** @@ -52,56 +64,47 @@ class BcftoolsFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Bcftools function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class BcftoolsFn( val args: BcftoolsFnArgs, - val files: Seq[String], - val environment: Map[String, String], val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { - /** - * @param args Bcftools function arguments. - * @param sc Spark context. - */ - def this(args: BcftoolsFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) + override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { + def root(): String = { + val path = new Path(args.referencePath) + val fs = path.getFileSystem(sc.hadoopConfiguration) + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } - /** - * @param args Bcftools function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: BcftoolsFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + val builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("norm") + .add("--fasta-ref") + .add(if (args.addFiles) "$0" else args.referencePath) - override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { + if (args.addFiles) { + builder.addFile(args.referencePath) + builder.addFile(args.referencePath + ".fai") + } - val bcftoolsCommand = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "bcftools", - "norm", - "--fasta-ref", - args.referencePath) - } else { - Seq(args.bcftoolsPath, - "norm", - "--fasta-ref", - args.referencePath) + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root()) } - log.info("Piping {} to bcftools with command: {} files: {} environment: {}", - Array(variantContexts, bcftoolsCommand, files, environment)) + log.info("Piping {} to bcftools with command: {} files: {}", + Array(variantContexts, builder.build(), builder.getFiles())) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe(bcftoolsCommand, files, environment) + variantContexts.pipe( + cmd = builder.build(), + files = builder.getFiles() + ) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala index 62c73f5b..890050e7 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala @@ -18,29 +18,41 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bowtie function arguments. */ class BowtieFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-bowtie_path", usage = "Path to the Bowtie executable. Defaults to bowtie.") - var bowtiePath: String = "bowtie" + @Args4jOption(required = false, name = "-executable", usage = "Path to the Bowtie executable. Defaults to bowtie.") + var executable: String = "bowtie" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0.") - var dockerImage: String = "quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0") + var image: String = "quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0" - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bowtie. If false, uses the Bowtie executable path.") + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false + + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false + + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bowtie.") var useDocker: Boolean = false - @Args4jOption(required = true, name = "-bowtie_index", usage = "Basename of the bowtie index to be searched, e.g. in bowtie [options]* ...") + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Bowtie.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-index", usage = "Basename of the bowtie index to be searched, e.g. in bowtie [options]* ...") var indexPath: String = null } @@ -49,56 +61,48 @@ class BowtieFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Bowtie function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. + * @param sc Spark context. */ class BowtieFn( val args: BowtieFnArgs, - val files: Seq[String], - val environment: Map[String, String]) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { + val sc: SparkContext) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { - /** - * @param args Bowtie function arguments. - */ - def this(args: BowtieFnArgs) = this(args, Seq.empty, Map.empty) + override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { + def root(): String = { + val path = new Path(args.indexPath) + val fs = path.getFileSystem(sc.hadoopConfiguration) + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } - /** - * @param args Bowtie function arguments. - * @param files Files to make locally available to the commands being run. - */ - def this(args: BowtieFnArgs, files: Seq[String]) = this(args, files, Map.empty) + val builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("-S") + .add(if (args.addFiles) "$0" else args.indexPath) + .add("--interleaved") + .add("-") - override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { + if (args.addFiles) { + builder.addFile(args.indexPath) + // todo: add all index files matching *.ewbt + } - val bowtieCommand = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "bowtie", - "-S", - args.indexPath, - "--interleaved", - "-" - ) - } else { - Seq(args.bowtiePath, - "-S", - args.indexPath, - "--interleaved", - "-" - ) + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root()) } - log.info("Piping {} to bowtie with command: {} files: {} environment: {}", - Array(fragments, bowtieCommand, files, environment)) + log.info("Piping {} to bowtie with command: {} files: {}", + Array(fragments, builder.build(), builder.getFiles())) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe(bowtieCommand, files, environment) + fragments.pipe( + cmd = builder.build(), + files = builder.getFiles() + ) } } @@ -115,10 +119,10 @@ object Bowtie extends BDGCommandCompanion { * Bowtie command line arguments. */ class BowtieArgs extends BowtieFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in interleaved FASTQ format.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe fragments from (e.g. interleaved FASTQ format, .ifq). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe alignments to (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") @@ -146,7 +150,7 @@ class Bowtie(protected val args: BowtieArgs) extends BDGSparkCommand[BowtieArgs] def run(sc: SparkContext) { val fragments = sc.loadFragments(args.inputPath, stringency = stringency) - val alignments = new BowtieFn(args).apply(fragments) + val alignments = new BowtieFn(args, sc).apply(fragments) alignments.save(args) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala index a6313637..27e00966 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala @@ -18,29 +18,41 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bowtie 2 function arguments. */ class Bowtie2FnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-bowtie2_path", usage = "Path to the Bowtie 2 executable. Defaults to bowtie2.") - var bowtie2Path: String = "bowtie2" + @Args4jOption(required = false, name = "-executable", usage = "Path to the Bowtie 2 executable. Defaults to bowtie2.") + var executable: String = "bowtie2" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bowtie2:2.3.4--py27pl5.22.0_0.") - var dockerImage: String = "quay.io/biocontainers/bowtie2:2.3.4--py27pl5.22.0_0" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bowtie2:2.3.4--py27pl5.22.0_0.") + var image: String = "quay.io/biocontainers/bowtie2:2.3.4--py27pl5.22.0_0" - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bowtie 2. If false, uses the Bowtie 2 executable path.") + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false + + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false + + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bowtie 2.") var useDocker: Boolean = false - @Args4jOption(required = true, name = "-bowtie2_index", usage = "Basename of the index for the reference genome, e.g. in bowtie2 [options]* -x .") + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Bowtie 2.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-index", usage = "Basename of the index for the reference genome, e.g. in bowtie2 [options]* -x .") var indexPath: String = null } @@ -49,56 +61,48 @@ class Bowtie2FnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Bowtie 2 function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. + * @param sc Spark context. */ class Bowtie2Fn( val args: Bowtie2FnArgs, - val files: Seq[String], - val environment: Map[String, String]) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { + val sc: SparkContext) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { - /** - * @param args Bowtie 2 function arguments. - */ - def this(args: Bowtie2FnArgs) = this(args, Seq.empty, Map.empty) + override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { + def root(): String = { + val path = new Path(args.indexPath) + val fs = path.getFileSystem(sc.hadoopConfiguration) + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } - /** - * @param args Bowtie 2 function arguments. - * @param files Files to make locally available to the commands being run. - */ - def this(args: Bowtie2FnArgs, files: Seq[String]) = this(args, files, Map.empty) + val builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("-x") + .add(if (args.addFiles) "$0" else args.indexPath) + .add("--interleaved") + .add("-") - override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { + if (args.addFiles) { + builder.addFile(args.indexPath) + // todo: add all index files matching *.bt2 + } - val bowtie2Command = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "bowtie2", - "-x", - args.indexPath, - "--interleaved", - "-" - ) - } else { - Seq(args.bowtie2Path, - "-x", - args.indexPath, - "--interleaved", - "-" - ) + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root()) } - log.info("Piping {} to bowtie2 with command: {} files: {} environment: {}", - Array(fragments, bowtie2Command, files, environment)) + log.info("Piping {} to bowtie2 with command: {} files: {}", + Array(fragments, builder.build(), builder.getFiles())) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe(bowtie2Command, files, environment) + fragments.pipe( + cmd = builder.build(), + files = builder.getFiles() + ) } } @@ -115,10 +119,10 @@ object Bowtie2 extends BDGCommandCompanion { * Bowtie 2 command line arguments. */ class Bowtie2Args extends Bowtie2FnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in interleaved FASTQ format.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe fragments from (e.g. interleaved FASTQ format, .ifq). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe alignments to (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") @@ -146,7 +150,7 @@ class Bowtie2(protected val args: Bowtie2Args) extends BDGSparkCommand[Bowtie2Ar def run(sc: SparkContext) { val fragments = sc.loadFragments(args.inputPath, stringency = stringency) - val alignments = new Bowtie2Fn(args).apply(fragments) + val alignments = new Bowtie2Fn(args, sc).apply(fragments) alignments.save(args) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala index 734e9a3e..fad75174 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala @@ -25,10 +25,12 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.cannoli.util.QuerynameGrouper import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bwa function arguments. @@ -37,23 +39,26 @@ class BwaFnArgs extends Args4jBase { @Argument(required = true, metaVar = "SAMPLE", usage = "Sample ID.", index = 2) var sample: String = null - @Args4jOption(required = true, name = "-index", usage = "Path to the bwa index to be searched, e.g. in bwa [options]* ...") + @Args4jOption(required = true, name = "-index", usage = "Path to the BWA index to be searched, e.g. in bwa [options]* .") var indexPath: String = null - @Args4jOption(required = false, name = "-bwa_path", usage = "Path to the BWA executable. Defaults to bwa.") - var bwaPath: String = "bwa" + @Args4jOption(required = false, name = "-executable", usage = "Path to the BWA executable. Defaults to bwa.") + var executable: String = "bwa" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bwa:0.7.17--pl5.22.0_0.") - var dockerImage: String = "quay.io/biocontainers/bwa:0.7.17--pl5.22.0_0" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bwa:0.7.17--pl5.22.0_0.") + var image: String = "quay.io/biocontainers/bwa:0.7.17--pl5.22.0_0" - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch BWA. If false, uses the BWA executable path.") - var useDocker: Boolean = false + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false + + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-docker_cmd", usage = "The docker command to run. Defaults to 'docker'.") - var dockerCmd: String = "docker" + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch BWA.") + var useDocker: Boolean = false - @Args4jOption(required = false, name = "-add_indices", usage = "Adds index files via SparkFiles mechanism.") - var addIndices: Boolean = false + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch BWA.") + var useSingularity: Boolean = false } /** @@ -61,33 +66,21 @@ class BwaFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Bwa function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class BwaFn( val args: BwaFnArgs, - val files: Seq[String], - val environment: Map[String, String], val sc: SparkContext) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { - /** - * @param args Bwa function arguments. - * @param sc Spark context. - */ - def this(args: BwaFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args Bwa function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: BwaFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) - override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { val sample = args.sample + def root(): String = { + val path = new Path(args.indexPath) + val fs = path.getFileSystem(sc.hadoopConfiguration) + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } + def getIndexPaths(fastaPath: String): Seq[String] = { val requiredExtensions = Seq("", ".amb", @@ -126,49 +119,38 @@ class BwaFn( pathsWithScheme ++ optionalPathsWithScheme } - val (filesToAdd, bwaCommand) = if (args.useDocker) { - val (mountpoint, indexPath, filesToMount) = if (args.addIndices) { - ("$root", "$0", getIndexPaths(args.indexPath)) - } else { - (Path.getPathWithoutSchemeAndAuthority(new Path(args.indexPath).getParent()).toString, - args.indexPath, - Seq.empty) - } - - (filesToMount, Seq(args.dockerCmd, - "-v", "%s:%s".format(mountpoint, mountpoint), - "run", - "--rm", - args.dockerImage, - "mem", - "-t", "1", - "-R", s"@RG\\tID:${sample}\\tLB:${sample}\\tPL:ILLUMINA\\tPU:0\\tSM:${sample}", - "-p", - indexPath, - "-")) - } else { - val (indexPath, filesToMount) = if (args.addIndices) { - ("$0", getIndexPaths(args.indexPath)) - } else { - (args.indexPath, Seq.empty) - } + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("mem") + .add("-t") + .add("1") + .add("-R") + .add(s"@RG\\tID:${sample}\\tLB:${sample}\\tPL:ILLUMINA\\tPU:0\\tSM:${sample}") + .add("-p") + .add(if (args.addFiles) "$0" else args.indexPath) + .add("-") + + if (args.addFiles) { + getIndexPaths(args.indexPath).foreach(builder.addFile(_)) + } - (filesToMount, Seq(args.bwaPath, - "mem", - "-t", "1", - "-R", s"@RG\\tID:${sample}\\tLB:${sample}\\tPL:ILLUMINA\\tPU:0\\tSM:${sample}", - "-p", - args.indexPath, - "-")) + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root()) } - log.info("Piping {} to bwa with command: {} files: {} environment: {}", - Array(fragments, bwaCommand, files, environment)) + log.info("Piping {} to bwa with command: {} files: {}", + Array(fragments, builder.build(), builder.getFiles())) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe(bwaCommand, files, environment) + fragments.pipe( + cmd = builder.build(), + files = builder.getFiles() + ) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala index c3c91e7c..62d52ea2 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala @@ -27,6 +27,7 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, BAMInFormatter } import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD, VCFOutFormatter } +import org.bdgenomics.adam.util.FileExtensions._ import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging @@ -138,7 +139,7 @@ object Freebayes extends BDGCommandCompanion { * Freebayes command line arguments. */ class FreebayesArgs extends FreebayesFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe genotypes to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1) @@ -167,13 +168,6 @@ class Freebayes(protected val args: FreebayesArgs) extends BDGSparkCommand[Freeb val companion = Freebayes val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency) - // todo: org.bdgenomics.adam.util.FileExtensions is inaccessible - def isVcfExt(pathName: String): Boolean = { - pathName.endsWith(".vcf") || - pathName.endsWith(".vcf.gz") || - pathName.endsWith(".vcf.bgz") - } - def run(sc: SparkContext) { val alignments = sc.loadAlignments(args.inputPath, stringency = stringency) val variantContexts = new FreebayesFn(args, sc).apply(alignments) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala index 9152e584..e222910b 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala @@ -18,31 +18,44 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.bdgenomics.adam.models.VariantContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, BAMInFormatter } import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD, VCFOutFormatter } +import org.bdgenomics.adam.util.FileExtensions._ +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Samtools function arguments. */ class SamtoolsFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-samtools_path", usage = "Path to the samtools executable. Defaults to samtools.") - var samtoolsPath: String = "samtools" + @Args4jOption(required = false, name = "-executable", usage = "Path to the samtools executable. Defaults to samtools.") + var executable: String = "samtools" - @Args4jOption(required = true, name = "-samtools_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") - var referencePath: String = null + @Args4jOption(required = false, name = "-docker_image", usage = "Container image to use. Defaults to quay.io/biocontainers/samtools:1.6--0.") + var image: String = "quay.io/biocontainers/samtools:1.6--0" + + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/samtools:1.6--0.") - var dockerImage: String = "quay.io/biocontainers/samtools:1.6--0" + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch samtools. If false, uses the samtools executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch samtools.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch samtools.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") + var referencePath: String = null } /** @@ -50,62 +63,50 @@ class SamtoolsFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Samtools function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class SamtoolsFn( val args: SamtoolsFnArgs, - val files: Seq[String], - val environment: Map[String, String], val sc: SparkContext) extends Function1[AlignmentRecordRDD, VariantContextRDD] with Logging { - /** - * @param args Samtools function arguments. - * @param sc Spark context. - */ - def this(args: SamtoolsFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args Samtools function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: SamtoolsFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) - override def apply(alignments: AlignmentRecordRDD): VariantContextRDD = { + def root(): String = { + val path = new Path(args.referencePath) + val fs = path.getFileSystem(sc.hadoopConfiguration) + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } - val samtoolsCommand = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "samtools", - "mpileup", - "-", - "--reference", - args.referencePath, - "-v", - "-u") - } else { - Seq(args.samtoolsPath, - "mpileup", - "-", - "--reference", - args.referencePath, - "-v", - "-u") + val builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("mpileup") + .add("-") + .add("--reference") + .add(if (args.addFiles) "$0" else args.referencePath) + .add("-v") + .add("-u") + + if (args.addFiles) { + builder.addFile(args.referencePath) + builder.addFile(args.referencePath + ".fai") + } + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root()) } - log.info("Piping {} to samtools with command: {} files: {} environment: {}", - Array(alignments, samtoolsCommand, files, environment)) + log.info("Piping {} to samtools with command: {} files: {}", + Array(alignments, builder.build(), builder.getFiles())) implicit val tFormatter = BAMInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - alignments.pipe[VariantContext, VariantContextRDD, BAMInFormatter](samtoolsCommand, files, environment) + alignments.pipe[VariantContext, VariantContextRDD, BAMInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } @@ -122,10 +123,10 @@ object Samtools extends BDGCommandCompanion { * Samtools command line arguments. */ class SamtoolsArgs extends SamtoolsFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe genotypes to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") @@ -154,6 +155,17 @@ class Samtools(protected val args: SamtoolsArgs) extends BDGSparkCommand[Samtool def run(sc: SparkContext) { val alignments = sc.loadAlignments(args.inputPath, stringency = stringency) val variantContexts = new SamtoolsFn(args, sc).apply(alignments) - variantContexts.saveAsVcf(args, stringency) + + if (isVcfExt(args.outputPath)) { + variantContexts.saveAsVcf( + args.inputPath, + asSingleFile = args.asSingleFile, + deferMerging = args.deferMerging, + disableFastConcat = args.disableFastConcat, + stringency + ) + } else { + variantContexts.toGenotypes.saveAsParquet(args) + } } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala index 1875d726..0149bdca 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala @@ -26,25 +26,33 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * SnpEff function arguments. */ class SnpEffFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-database", usage = "SnpEff database name. Defaults to GRCh38.86.") - var snpEffDatabase: String = "GRCh38.86" + @Args4jOption(required = false, name = "-executable", usage = "Path to the SnpEff executable. Defaults to snpEff.") + var executable: String = "snpEff" - @Args4jOption(required = false, name = "-snpeff_path", usage = "Path to the SnpEff executable. Defaults to snpEff.") - var snpEffPath: String = "snpEff" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/snpeff:4.3.1t--0.") + var image: String = "quay.io/biocontainers/snpeff:4.3.1t--0" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/snpeff:4.3.1t--0.") - var dockerImage: String = "quay.io/biocontainers/snpeff:4.3.1t--0" + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch SnpEff. If false, uses the SnpEff executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch SnpEff.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch SnpEff.") + var useSingularity: Boolean = false + + @Args4jOption(required = false, name = "-database", usage = "SnpEff database name. Defaults to GRCh38.86.") + var database: String = "GRCh38.86" } /** @@ -52,53 +60,35 @@ class SnpEffFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args SnpEff function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class SnpEffFn( val args: SnpEffFnArgs, - val files: Seq[String], - val environment: Map[String, String], val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { - /** - * @param args SnpEff function arguments. - * @param sc Spark context. - */ - def this(args: SnpEffFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args SnpEff function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: SnpEffFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) - override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { - val snpEffCommand = if (args.useDocker) { - Seq("docker", - "run", - "--rm", - args.dockerImage, - "snpEff", - "-download", - args.snpEffDatabase) - } else { - Seq(args.snpEffPath, - "-download", - args.snpEffDatabase) + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("-download") + .add(args.database) + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) } - log.info("Piping {} to snpEff with command: {} files: {} environment: {}", - Array(variantContexts, snpEffCommand, files, environment)) + log.info("Piping {} to snpEff with command: {} files: {}", + Array(variantContexts, builder.build(), builder.getFiles())) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe(snpEffCommand, files, environment) + variantContexts.pipe( + cmd = builder.build(), + files = builder.getFiles() + ) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala index cafb0571..cc5e4e2d 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala @@ -18,6 +18,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs @@ -26,25 +27,39 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Vt function arguments. */ class VtFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-vt_path", usage = "Path to the vt executable. Defaults to vt.") - var vtPath: String = "vt" + @Args4jOption(required = false, name = "-executable", usage = "Path to the vt executable. Defaults to vt.") + var executable: String = "vt" - @Args4jOption(required = true, name = "-vt_reference", usage = "Reference sequence for analysis.") - var referencePath: String = null + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to heuermh/vt.") + var image: String = "heuermh/vt" + + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to heuermh/vt.") - var dockerImage: String = "heuermh/vt" + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch vt. If false, uses the vt executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Freebayes.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Freebayes.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-reference", usage = "Reference sequence for analysis.") + var referencePath: String = null + + @Args4jOption(required = false, name = "-window", usage = "Window size for local sorting of variants. Defaults to 10000.") + var window: Int = _ } /** @@ -52,58 +67,47 @@ class VtFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Vt function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class VtFn( val args: VtFnArgs, - val files: Seq[String], - val environment: Map[String, String], val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { - /** - * @param args Vt function arguments. - * @param sc Spark context. - */ - def this(args: VtFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) + override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { + def root(): String = { + val path = new Path(args.referencePath) + val fs = path.getFileSystem(sc.hadoopConfiguration) + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } - /** - * @param args Vt function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: VtFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("normalize") + .add("-") + .add("-r") + .add(if (args.addFiles) "$0" else args.referencePath) - override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { + Option(args.window).foreach(i => builder.add("-w").add(i.toString)) + + if (args.addFiles) builder.addFile(args.referencePath) - val vtCommand = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "vt", - "normalize", - "-", - "-r", - args.referencePath) - } else { - Seq(args.vtPath, - "normalize", - "-", - "-r", - args.referencePath) + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root()) } - log.info("Piping {} to vt with command: {} files: {} environment: {}", - Array(variantContexts, vtCommand, files, environment)) + log.info("Piping {} to vt with command: {} files: {}", + Array(variantContexts, builder.build(), builder.getFiles())) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe(vtCommand, files, environment) + variantContexts.pipe( + cmd = builder.build(), + files = builder.getFiles() + ) } } From e892f8801055f3466f2ebe177c510b1c35605b08 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Thu, 1 Mar 2018 10:47:25 -0600 Subject: [PATCH 11/18] Minor usage doc fix. --- cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala index cc5e4e2d..19afc354 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala @@ -49,10 +49,10 @@ class VtFnArgs extends Args4jBase { @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Freebayes.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch vt.") var useDocker: Boolean = false - @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Freebayes.") + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch vt.") var useSingularity: Boolean = false @Args4jOption(required = true, name = "-reference", usage = "Reference sequence for analysis.") From 8d82ce803b487292d350c4f7b5880d07a3ae0794 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Fri, 2 Mar 2018 11:42:01 -0600 Subject: [PATCH 12/18] Adding CannoliFn abstract class to reduce code duplication. --- .../org/bdgenomics/cannoli/cli/Bcftools.scala | 9 +- .../org/bdgenomics/cannoli/cli/Bedtools.scala | 10 +- .../org/bdgenomics/cannoli/cli/Bowtie.scala | 19 ++-- .../org/bdgenomics/cannoli/cli/Bowtie2.scala | 13 +-- .../org/bdgenomics/cannoli/cli/Bwa.scala | 10 +- .../bdgenomics/cannoli/cli/CannoliFn.scala | 92 +++++++++++++++++++ .../bdgenomics/cannoli/cli/Freebayes.scala | 11 +-- .../org/bdgenomics/cannoli/cli/Samtools.scala | 9 +- .../org/bdgenomics/cannoli/cli/SnpEff.scala | 2 +- .../scala/org/bdgenomics/cannoli/cli/Vt.scala | 9 +- 10 files changed, 119 insertions(+), 65 deletions(-) create mode 100644 cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala index fa5ec665..c5c2f413 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala @@ -68,14 +68,9 @@ class BcftoolsFnArgs extends Args4jBase { */ class BcftoolsFn( val args: BcftoolsFnArgs, - val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { + sc: SparkContext) extends CannoliFn[VariantContextRDD, VariantContextRDD](sc) with Logging { override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { - def root(): String = { - val path = new Path(args.referencePath) - val fs = path.getFileSystem(sc.hadoopConfiguration) - Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString - } val builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) @@ -92,7 +87,7 @@ class BcftoolsFn( builder .setImage(args.image) .setSudo(args.sudo) - .addMount(if (args.addFiles) "$root" else root()) + .addMount(if (args.addFiles) "$root" else root(args.referencePath)) } log.info("Piping {} to bcftools with command: {} files: {}", diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index 160b8fde..29dfc76a 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -82,7 +82,7 @@ class BedtoolsFnArgs extends Args4jBase { */ class BedtoolsFn( val args: BedtoolsFnArgs, - val sc: SparkContext) extends Function1[FeatureRDD, FeatureRDD] with Logging { + sc: SparkContext) extends CannoliFn[FeatureRDD, FeatureRDD](sc) with Logging { override def apply(features: FeatureRDD): FeatureRDD = { val optA = Option(args.a) @@ -92,12 +92,6 @@ class BedtoolsFn( val file = List(optA, optB).flatten.get(0) - def root(): String = { - val path = new Path(file) - val fs = path.getFileSystem(sc.hadoopConfiguration) - Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString - } - var builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) .add("intersect") @@ -113,7 +107,7 @@ class BedtoolsFn( builder .setImage(args.image) .setSudo(args.sudo) - .addMount(if (args.addFiles) "$root" else root()) + .addMount(if (args.addFiles) "$root" else root(file)) } log.info("Piping {} to bedtools with command: {} files: {}", diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala index 890050e7..7d2cdec1 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala @@ -18,7 +18,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency -import org.apache.hadoop.fs.{ FileSystem, Path } +import org.apache.hadoop.fs.{ FileSystem, Path, PathFilter } import org.apache.spark.SparkContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs @@ -37,7 +37,7 @@ class BowtieFnArgs extends Args4jBase { @Args4jOption(required = false, name = "-executable", usage = "Path to the Bowtie executable. Defaults to bowtie.") var executable: String = "bowtie" - @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0") + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0.") var image: String = "quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0" @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") @@ -52,7 +52,7 @@ class BowtieFnArgs extends Args4jBase { @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Bowtie.") var useSingularity: Boolean = false - @Args4jOption(required = true, name = "-index", usage = "Basename of the bowtie index to be searched, e.g. in bowtie [options]* ...") + @Args4jOption(required = true, name = "-index", usage = "Basename of the bowtie index to be searched, e.g. in bowtie [options]* .") var indexPath: String = null } @@ -65,14 +65,9 @@ class BowtieFnArgs extends Args4jBase { */ class BowtieFn( val args: BowtieFnArgs, - val sc: SparkContext) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { + sc: SparkContext) extends CannoliFn[FragmentRDD, AlignmentRecordRDD](sc) with Logging { override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { - def root(): String = { - val path = new Path(args.indexPath) - val fs = path.getFileSystem(sc.hadoopConfiguration) - Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString - } val builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) @@ -82,15 +77,17 @@ class BowtieFn( .add("-") if (args.addFiles) { + // add args.indexPath for "$0" builder.addFile(args.indexPath) - // todo: add all index files matching *.ewbt + // add bowtie indexes via globbed index path + builder.addFiles(files(args.indexPath + "*.ebwt")) } if (args.useDocker || args.useSingularity) { builder .setImage(args.image) .setSudo(args.sudo) - .addMount(if (args.addFiles) "$root" else root()) + .addMount(if (args.addFiles) "$root" else root(args.indexPath)) } log.info("Piping {} to bowtie with command: {} files: {}", diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala index 27e00966..59507853 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala @@ -65,14 +65,9 @@ class Bowtie2FnArgs extends Args4jBase { */ class Bowtie2Fn( val args: Bowtie2FnArgs, - val sc: SparkContext) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { + sc: SparkContext) extends CannoliFn[FragmentRDD, AlignmentRecordRDD](sc) with Logging { override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { - def root(): String = { - val path = new Path(args.indexPath) - val fs = path.getFileSystem(sc.hadoopConfiguration) - Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString - } val builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) @@ -82,15 +77,17 @@ class Bowtie2Fn( .add("-") if (args.addFiles) { + // add args.indexPath for "$0" builder.addFile(args.indexPath) - // todo: add all index files matching *.bt2 + // add bowtie2 indexes via globbed index path + builder.addFiles(files(args.indexPath + "*.bt2")) } if (args.useDocker || args.useSingularity) { builder .setImage(args.image) .setSudo(args.sudo) - .addMount(if (args.addFiles) "$root" else root()) + .addMount(if (args.addFiles) "$root" else root(args.indexPath)) } log.info("Piping {} to bowtie2 with command: {} files: {}", diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala index fad75174..f665dea5 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala @@ -70,17 +70,11 @@ class BwaFnArgs extends Args4jBase { */ class BwaFn( val args: BwaFnArgs, - val sc: SparkContext) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { + sc: SparkContext) extends CannoliFn[FragmentRDD, AlignmentRecordRDD](sc) with Logging { override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { val sample = args.sample - def root(): String = { - val path = new Path(args.indexPath) - val fs = path.getFileSystem(sc.hadoopConfiguration) - Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString - } - def getIndexPaths(fastaPath: String): Seq[String] = { val requiredExtensions = Seq("", ".amb", @@ -138,7 +132,7 @@ class BwaFn( builder .setImage(args.image) .setSudo(args.sudo) - .addMount(if (args.addFiles) "$root" else root()) + .addMount(if (args.addFiles) "$root" else root(args.indexPath)) } log.info("Piping {} to bwa with command: {} files: {}", diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala new file mode 100644 index 00000000..90025558 --- /dev/null +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala @@ -0,0 +1,92 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.cli + +import java.io.FileNotFoundException +import org.apache.hadoop.fs.{ Path, PathFilter } +import org.apache.spark.SparkContext + +/** + * Cannoli function. + * + * @param sc Spark context. + * @tparam X Cannoli function argument type parameter. + * @tparam Y Cannoli function result type parameter. + */ +abstract class CannoliFn[X, Y](val sc: SparkContext) extends Function1[X, Y] { + + def root(pathName: String): String = { + val path = new Path(pathName) + + // get the underlying fs for the file + val fs = Option(path.getFileSystem(sc.hadoopConfiguration)).getOrElse( + throw new FileNotFoundException( + s"Couldn't find filesystem for ${path.toUri} with Hadoop configuration ${sc.hadoopConfiguration}" + )) + + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } + + def files(pathName: String): Seq[String] = { + files(pathName, new PathFilter() { + def accept(path: Path): Boolean = { + return true + } + }) + } + + def files(pathName: String, filter: PathFilter): Seq[String] = { + val path = new Path(pathName) + + // get the underlying fs for the file + val fs = Option(path.getFileSystem(sc.hadoopConfiguration)).getOrElse( + throw new FileNotFoundException( + s"Couldn't find filesystem for ${path.toUri} with Hadoop configuration ${sc.hadoopConfiguration}" + )) + + // elaborate out the path; this returns FileStatuses + val paths = if (fs.isDirectory(path)) { + val paths = fs.listStatus(path) + + if (paths == null || paths.isEmpty) { + throw new FileNotFoundException( + s"Couldn't find any files matching ${path.toUri}" + ) + } + fs.listStatus(path, filter) + } else { + val paths = fs.globStatus(path) + if (paths == null || paths.isEmpty) { + throw new FileNotFoundException( + s"Couldn't find any files matching ${path.toUri}" + ) + } + fs.globStatus(path, filter) + } + + // the path must match PathFilter + if (paths == null || paths.isEmpty) { + throw new FileNotFoundException( + s"Couldn't find any files matching ${path.toUri} for the requested PathFilter" + ) + } + + // map the paths returned to their paths + paths.map(_.getPath.toString) + } +} diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala index 62d52ea2..f9b93dc8 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala @@ -41,7 +41,7 @@ class FreebayesFnArgs extends Args4jBase { @Args4jOption(required = false, name = "-executable", usage = "Path to the Freebayes executable. Defaults to freebayes.") var executable: String = "freebayes" - @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2") + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2.") var image: String = "quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2" @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") @@ -75,14 +75,9 @@ class FreebayesFnArgs extends Args4jBase { */ class FreebayesFn( val args: FreebayesFnArgs, - val sc: SparkContext) extends Function1[AlignmentRecordRDD, VariantContextRDD] with Logging { + sc: SparkContext) extends CannoliFn[AlignmentRecordRDD, VariantContextRDD](sc) with Logging { override def apply(alignments: AlignmentRecordRDD): VariantContextRDD = { - def root(): String = { - val path = new Path(args.referencePath) - val fs = path.getFileSystem(sc.hadoopConfiguration) - Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString - } var builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) @@ -105,7 +100,7 @@ class FreebayesFn( builder .setImage(args.image) .setSudo(args.sudo) - .addMount(if (args.addFiles) "$root" else root()) + .addMount(if (args.addFiles) "$root" else root(args.referencePath)) } log.info("Piping {} to freebayes with command: {} files: {}", diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala index e222910b..1bf5510f 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala @@ -67,14 +67,9 @@ class SamtoolsFnArgs extends Args4jBase { */ class SamtoolsFn( val args: SamtoolsFnArgs, - val sc: SparkContext) extends Function1[AlignmentRecordRDD, VariantContextRDD] with Logging { + sc: SparkContext) extends CannoliFn[AlignmentRecordRDD, VariantContextRDD](sc) with Logging { override def apply(alignments: AlignmentRecordRDD): VariantContextRDD = { - def root(): String = { - val path = new Path(args.referencePath) - val fs = path.getFileSystem(sc.hadoopConfiguration) - Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString - } val builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) @@ -94,7 +89,7 @@ class SamtoolsFn( builder .setImage(args.image) .setSudo(args.sudo) - .addMount(if (args.addFiles) "$root" else root()) + .addMount(if (args.addFiles) "$root" else root(args.referencePath)) } log.info("Piping {} to samtools with command: {} files: {}", diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala index 0149bdca..28ee88e9 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala @@ -64,7 +64,7 @@ class SnpEffFnArgs extends Args4jBase { */ class SnpEffFn( val args: SnpEffFnArgs, - val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { + sc: SparkContext) extends CannoliFn[VariantContextRDD, VariantContextRDD](sc) with Logging { override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala index 19afc354..1fc037de 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala @@ -71,14 +71,9 @@ class VtFnArgs extends Args4jBase { */ class VtFn( val args: VtFnArgs, - val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { + sc: SparkContext) extends CannoliFn[VariantContextRDD, VariantContextRDD](sc) with Logging { override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { - def root(): String = { - val path = new Path(args.referencePath) - val fs = path.getFileSystem(sc.hadoopConfiguration) - Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString - } var builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) @@ -95,7 +90,7 @@ class VtFn( builder .setImage(args.image) .setSudo(args.sudo) - .addMount(if (args.addFiles) "$root" else root()) + .addMount(if (args.addFiles) "$root" else root(args.referencePath)) } log.info("Piping {} to vt with command: {} files: {}", From 151cf63bcba519f33cfdec8cc9b7693a0eb34c9b Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Sun, 4 Mar 2018 21:29:17 -0600 Subject: [PATCH 13/18] Throw file not found exception if directory is empty. --- .../main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala index 90025558..b6847989 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala @@ -62,10 +62,9 @@ abstract class CannoliFn[X, Y](val sc: SparkContext) extends Function1[X, Y] { // elaborate out the path; this returns FileStatuses val paths = if (fs.isDirectory(path)) { val paths = fs.listStatus(path) - - if (paths == null || paths.isEmpty) { + if (paths.isEmpty) { throw new FileNotFoundException( - s"Couldn't find any files matching ${path.toUri}" + s"Couldn't find any files matching ${path.toUri}, directory is empty" ) } fs.listStatus(path, filter) From 1ead2aefda2302039812003e30047e566f5dec83 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Wed, 14 Mar 2018 15:24:12 -0500 Subject: [PATCH 14/18] Fix log messages. --- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala index c5c2f413..c1a82705 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala @@ -91,7 +91,7 @@ class BcftoolsFn( } log.info("Piping {} to bcftools with command: {} files: {}", - Array(variantContexts, builder.build(), builder.getFiles())) + variantContexts, builder.build(), builder.getFiles()) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index 29dfc76a..3152dccb 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -111,7 +111,7 @@ class BedtoolsFn( } log.info("Piping {} to bedtools with command: {} files: {}", - Array(features, builder.build(), builder.getFiles())) + features, builder.build(), builder.getFiles()) implicit val tFormatter = BEDInFormatter implicit val uFormatter = new BEDOutFormatter diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala index 7d2cdec1..afd64d10 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala @@ -91,7 +91,7 @@ class BowtieFn( } log.info("Piping {} to bowtie with command: {} files: {}", - Array(fragments, builder.build(), builder.getFiles())) + fragments, builder.build(), builder.getFiles()) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala index 59507853..cffecb61 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala @@ -91,7 +91,7 @@ class Bowtie2Fn( } log.info("Piping {} to bowtie2 with command: {} files: {}", - Array(fragments, builder.build(), builder.getFiles())) + fragments, builder.build(), builder.getFiles()) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala index f665dea5..ae6356a5 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala @@ -136,7 +136,7 @@ class BwaFn( } log.info("Piping {} to bwa with command: {} files: {}", - Array(fragments, builder.build(), builder.getFiles())) + fragments, builder.build(), builder.getFiles()) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala index f9b93dc8..d6481f53 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala @@ -104,7 +104,7 @@ class FreebayesFn( } log.info("Piping {} to freebayes with command: {} files: {}", - Array(alignments, builder.build(), builder.getFiles())) + alignments, builder.build(), builder.getFiles()) val accumulator: CollectionAccumulator[VCFHeaderLine] = sc.collectionAccumulator("headerLines") diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala index 1bf5510f..d1a963a5 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala @@ -93,7 +93,7 @@ class SamtoolsFn( } log.info("Piping {} to samtools with command: {} files: {}", - Array(alignments, builder.build(), builder.getFiles())) + alignments, builder.build(), builder.getFiles()) implicit val tFormatter = BAMInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala index 28ee88e9..0d944479 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala @@ -80,7 +80,7 @@ class SnpEffFn( } log.info("Piping {} to snpEff with command: {} files: {}", - Array(variantContexts, builder.build(), builder.getFiles())) + variantContexts, builder.build(), builder.getFiles()) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala index 1fc037de..aaf4a50b 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala @@ -94,7 +94,7 @@ class VtFn( } log.info("Piping {} to vt with command: {} files: {}", - Array(variantContexts, builder.build(), builder.getFiles())) + variantContexts, builder.build(), builder.getFiles()) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) From 3c2da5569092c5c8e4a20529990847ce7b9b2595 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Wed, 14 Mar 2018 15:41:53 -0500 Subject: [PATCH 15/18] Fix paths for bwa. --- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala index ae6356a5..84bfe639 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala @@ -92,7 +92,7 @@ class BwaFn( } def optionalPath(ext: String): Option[String] = { - val path = new Path(fastaPath, ext) + val path = new Path(fastaPath + ext) val fs = path.getFileSystem(sc.hadoopConfiguration) if (fs.exists(path)) { Some(canonicalizePath(fs, path)) From a1a3ac9a8a29c3efe8a4b07b33f797b8234ce41b Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Tue, 20 Mar 2018 13:09:21 -0500 Subject: [PATCH 16/18] Use absolute path when not using -add_files. --- .../scala/org/bdgenomics/cannoli/cli/Bcftools.scala | 2 +- .../scala/org/bdgenomics/cannoli/cli/CannoliFn.scala | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala index c1a82705..563a6be0 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala @@ -76,7 +76,7 @@ class BcftoolsFn( .setExecutable(args.executable) .add("norm") .add("--fasta-ref") - .add(if (args.addFiles) "$0" else args.referencePath) + .add(if (args.addFiles) "$0" else absolute(args.referencePath)) if (args.addFiles) { builder.addFile(args.referencePath) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala index b6847989..0892a5da 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala @@ -30,6 +30,18 @@ import org.apache.spark.SparkContext */ abstract class CannoliFn[X, Y](val sc: SparkContext) extends Function1[X, Y] { + def absolute(pathName: String): String = { + val path = new Path(pathName) + + // get the underlying fs for the file + val fs = Option(path.getFileSystem(sc.hadoopConfiguration)).getOrElse( + throw new FileNotFoundException( + s"Couldn't find filesystem for ${path.toUri} with Hadoop configuration ${sc.hadoopConfiguration}" + )) + + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path)).toString + } + def root(pathName: String): String = { val path = new Path(pathName) From e636667b242d8fc0c5b4304ed0897456ae362846 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Wed, 21 Mar 2018 11:44:26 -0500 Subject: [PATCH 17/18] Update pipe method calls due to latest ADAM 0.24.0 snapshot. --- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala | 4 +++- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala | 4 +++- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala | 4 +++- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala | 4 +++- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala | 4 +++- cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala | 3 ++- cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala | 3 ++- cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala | 4 +++- cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala | 4 +++- 9 files changed, 25 insertions(+), 9 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala index 563a6be0..61d4d956 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala @@ -20,6 +20,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext +import org.bdgenomics.adam.models.VariantContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.variant.{ @@ -27,6 +28,7 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging @@ -96,7 +98,7 @@ class BcftoolsFn( implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe( + variantContexts.pipe[VariantContext, VariantContextProduct, VariantContextRDD, VCFInFormatter]( cmd = builder.build(), files = builder.getFiles() ) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index 3152dccb..b9dea64e 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -27,7 +27,9 @@ import org.bdgenomics.adam.rdd.feature.{ BEDInFormatter, BEDOutFormatter } +import org.bdgenomics.adam.sql.{ Feature => FeatureProduct } import org.bdgenomics.cannoli.builder.CommandBuilders +import org.bdgenomics.formats.avro.Feature; import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } @@ -116,7 +118,7 @@ class BedtoolsFn( implicit val tFormatter = BEDInFormatter implicit val uFormatter = new BEDOutFormatter - features.pipe( + features.pipe[Feature, FeatureProduct, FeatureRDD, BEDInFormatter]( cmd = builder.build(), files = builder.getFiles() ) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala index afd64d10..6be7198b 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala @@ -24,7 +24,9 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.adam.sql.{ AlignmentRecord => AlignmentRecordProduct } import org.bdgenomics.cannoli.builder.CommandBuilders +import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } @@ -96,7 +98,7 @@ class BowtieFn( implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe( + fragments.pipe[AlignmentRecord, AlignmentRecordProduct, AlignmentRecordRDD, InterleavedFASTQInFormatter]( cmd = builder.build(), files = builder.getFiles() ) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala index cffecb61..a7ee6d9a 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala @@ -24,7 +24,9 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.adam.sql.{ AlignmentRecord => AlignmentRecordProduct } import org.bdgenomics.cannoli.builder.CommandBuilders +import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } @@ -96,7 +98,7 @@ class Bowtie2Fn( implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe( + fragments.pipe[AlignmentRecord, AlignmentRecordProduct, AlignmentRecordRDD, InterleavedFASTQInFormatter]( cmd = builder.build(), files = builder.getFiles() ) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala index 84bfe639..ef2bc525 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala @@ -25,8 +25,10 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.adam.sql.{ AlignmentRecord => AlignmentRecordProduct } import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.cannoli.util.QuerynameGrouper +import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } @@ -141,7 +143,7 @@ class BwaFn( implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe( + fragments.pipe[AlignmentRecord, AlignmentRecordProduct, AlignmentRecordRDD, InterleavedFASTQInFormatter]( cmd = builder.build(), files = builder.getFiles() ) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala index d6481f53..d18c0506 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala @@ -27,6 +27,7 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, BAMInFormatter } import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } import org.bdgenomics.adam.util.FileExtensions._ import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ @@ -111,7 +112,7 @@ class FreebayesFn( implicit val tFormatter = BAMInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration, Some(accumulator)) - val variantContexts = alignments.pipe[VariantContext, VariantContextRDD, BAMInFormatter]( + val variantContexts = alignments.pipe[VariantContext, VariantContextProduct, VariantContextRDD, BAMInFormatter]( cmd = builder.build(), files = builder.getFiles() ) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala index d1a963a5..f123f949 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala @@ -25,6 +25,7 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, BAMInFormatter } import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } import org.bdgenomics.adam.util.FileExtensions._ import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ @@ -98,7 +99,7 @@ class SamtoolsFn( implicit val tFormatter = BAMInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - alignments.pipe[VariantContext, VariantContextRDD, BAMInFormatter]( + alignments.pipe[VariantContext, VariantContextProduct, VariantContextRDD, BAMInFormatter]( cmd = builder.build(), files = builder.getFiles() ) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala index 0d944479..ee5763bc 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala @@ -19,6 +19,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency import org.apache.spark.SparkContext +import org.bdgenomics.adam.models.VariantContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.variant.{ @@ -26,6 +27,7 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging @@ -85,7 +87,7 @@ class SnpEffFn( implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe( + variantContexts.pipe[VariantContext, VariantContextProduct, VariantContextRDD, VCFInFormatter]( cmd = builder.build(), files = builder.getFiles() ) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala index aaf4a50b..308744c5 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala @@ -20,6 +20,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext +import org.bdgenomics.adam.models.VariantContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.variant.{ @@ -27,6 +28,7 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging @@ -99,7 +101,7 @@ class VtFn( implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe( + variantContexts.pipe[VariantContext, VariantContextProduct, VariantContextRDD, VCFInFormatter]( cmd = builder.build(), files = builder.getFiles() ) From b8c3c0b5ca286d3cee0a260a88e7cf371baede11 Mon Sep 17 00:00:00 2001 From: Michael Heuer Date: Wed, 21 Mar 2018 12:07:12 -0500 Subject: [PATCH 18/18] Use absolute path when not using -add_files. --- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala | 4 ++-- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala | 2 +- cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index b9dea64e..1e58ff1d 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -98,9 +98,9 @@ class BedtoolsFn( .setExecutable(args.executable) .add("intersect") .add("-a") - .add(optA.fold("stdin")(if (args.addFiles) "$0" else _)) + .add(optA.fold("stdin")(f => if (args.addFiles) "$0" else absolute(f))) .add("-b") - .add(optB.fold("stdin")(if (args.addFiles) "$0" else _)) + .add(optB.fold("stdin")(f => if (args.addFiles) "$0" else absolute(f))) if (args.sorted) builder.add("-sorted") if (args.addFiles) builder.addFile(file) diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala index 6be7198b..bb894d32 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala @@ -74,7 +74,7 @@ class BowtieFn( val builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) .add("-S") - .add(if (args.addFiles) "$0" else args.indexPath) + .add(if (args.addFiles) "$0" else absolute(args.indexPath)) .add("--interleaved") .add("-") diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala index a7ee6d9a..5079f11f 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala @@ -74,7 +74,7 @@ class Bowtie2Fn( val builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) .add("-x") - .add(if (args.addFiles) "$0" else args.indexPath) + .add(if (args.addFiles) "$0" else absolute(args.indexPath)) .add("--interleaved") .add("-") diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala index d18c0506..c1e16dfe 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala @@ -83,7 +83,7 @@ class FreebayesFn( var builder = CommandBuilders.create(args.useDocker, args.useSingularity) .setExecutable(args.executable) .add("--fasta-reference") - .add(if (args.addFiles) "$0" else args.referencePath) + .add(if (args.addFiles) "$0" else absolute(args.referencePath)) .add("--stdin") .add("--strict-vcf") diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala index f123f949..60f23f6b 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala @@ -77,7 +77,7 @@ class SamtoolsFn( .add("mpileup") .add("-") .add("--reference") - .add(if (args.addFiles) "$0" else args.referencePath) + .add(if (args.addFiles) "$0" else absolute(args.referencePath)) .add("-v") .add("-u") diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala index 308744c5..5daa3837 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala @@ -82,7 +82,7 @@ class VtFn( .add("normalize") .add("-") .add("-r") - .add(if (args.addFiles) "$0" else args.referencePath) + .add(if (args.addFiles) "$0" else absolute(args.referencePath)) Option(args.window).foreach(i => builder.add("-w").add(i.toString))