Skip to content

Commit

Permalink
Merge pull request #19 from cBioPortal/gsoc-development
Browse files Browse the repository at this point in the history
gsoc-development into master
  • Loading branch information
ao508 committed May 15, 2019
2 parents 3fabdf0 + 403c828 commit 22bd35d
Show file tree
Hide file tree
Showing 863 changed files with 211,481 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Expand Up @@ -20,3 +20,7 @@

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

#intellij
.idea/
/target/
120 changes: 120 additions & 0 deletions pom.xml
@@ -0,0 +1,120 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.cbio</groupId>
<artifactId>gdcpipeline</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>gdcpipeline</name>
<description>GDC Data Transformation</description>

<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.2.7.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
</properties>

<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-oxm</artifactId>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<type>jar</type>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
<type>jar</type>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.9.5</version>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>1.7.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<version>1.7.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>1.3.156</version>
</dependency>
<dependency>
<groupId>com.github.genome-nexus</groupId>
<artifactId>genome-nexus-annotation-pipeline</artifactId>
<version>b97fa34b24dd5083f055cafa31867e69e0bcfb7c</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
<type>jar</type>
</dependency>
</dependencies>

<repositories>
<repository>
<id>jitpack.io</id>
<url>https://jitpack.io</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
128 changes: 128 additions & 0 deletions src/main/java/org/cbio/gdcpipeline/GDCPipelineApplication.java
@@ -0,0 +1,128 @@
package org.cbio.gdcpipeline;

import org.apache.commons.cli.*;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.cbio.gdcpipeline.util.CommonDataUtil;

import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.JobParametersBuilder;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.ConfigurableApplicationContext;

/**
* @author Dixit Patel
*/
@SpringBootApplication
public class GDCPipelineApplication {
private static Log LOG = LogFactory.getLog(GDCPipelineApplication.class);
private final static String GDC_JOB = "gdcJob";
private final static String DEFAULT_DATATYPES = "ALL";
private final static String DEFAULT_FILTER_NORMAL_SAMPLE = "true";
private final static String DEFAULT_SEPARATE_MAF_FILES = "false";
private final static String DEFAULT_ISOFORM_OVERRIDE_SOURCE = "uniprot";
private final static String DEFAULT_REFERENCE_GENOME_BUILD = CommonDataUtil.REFERENCE_GENOME.GRCh37.toString();

private static Options getOptions(String[] input) {
Options options = new Options();
options.addOption("s", "source", true, "source directory for files");
options.addOption("o", "output", true, "output directory for files");
options.addOption("c", "cancer_study_id", true, "Cancer Study Id");
options.addOption("m", "manifest_file", true, "Manifest file path");
options.addOption("f", "filter_normal_sample", true, "True or False. Flag to filter normal samples. Default is True ");
options.addOption("d", "datatypes", true, "Datatypes to run. Default is All");
options.addOption("separate_mafs", "separate_mafs", true, "True or False. Process MAF files individually or merge together. Default is False");
options.addOption("i", "isoformOverrideSource", true, "Isoform Override Source. Default is \'uniprot\'");
options.addOption("r", "reference_genome_build", true, "Reference Genome to use for processing MAF. Default is GRCh37");
options.addOption("h", "help", false, "shows this help document and quits.");
return options;
}

private static void help(Options gnuOptions, int exitStatus, String opt) {
HelpFormatter helpFormatter = new HelpFormatter();
if (!opt.isEmpty()) {
System.out.println(opt);
}
helpFormatter.printHelp(" Pipeline Options ", gnuOptions);
System.exit(exitStatus);
}

private static void launchJob(String[] args, String sourceDirectory, String outputDirectory, String cancer_study_id, String manifest_file, String filter_normal_sample, String datatypes, String separate_mafs,String isoformOverrideSource,String reference_genome_build) throws Exception {
SpringApplication app = new SpringApplication(GDCPipelineApplication.class);
ConfigurableApplicationContext ctx = app.run(args);
Job gdcJob = ctx.getBean(GDC_JOB, Job.class);
JobLauncher jobLauncher = ctx.getBean(JobLauncher.class);
JobParameters jobParameters = new JobParametersBuilder()
.addString("sourceDirectory", sourceDirectory)
.addString("outputDirectory", outputDirectory)
.addString("cancer_study_id", cancer_study_id)
.addString("manifest_file", manifest_file)
.addString("filter_normal_sample", filter_normal_sample)
.addString("datatypes", datatypes)
.addString("separate_mafs", separate_mafs)
.addString("isoformOverrideSource",isoformOverrideSource)
.addString("reference_genome_build",reference_genome_build)
.toJobParameters();
JobExecution jobExecution = jobLauncher.run(gdcJob, jobParameters);
}

public static void main(String[] args) throws Exception {
Options options = GDCPipelineApplication.getOptions(args);
CommandLineParser parser = new DefaultParser();
CommandLine cli = parser.parse(options, args);
if (cli.hasOption("help")) {
GDCPipelineApplication.help(options, 0, "");
}
if (!cli.hasOption("source")) {
GDCPipelineApplication.help(options, 0, "Source directory of files must be specified");
}
if (!cli.hasOption("output")) {
GDCPipelineApplication.help(options, 0, "Output directory for files must be specified");
}
if (!cli.hasOption("manifest_file")) {
GDCPipelineApplication.help(options, 0, "Manifest file path must be specified");
}
String datatypes = DEFAULT_DATATYPES;
if (cli.hasOption("datatypes")) {
datatypes = cli.getOptionValue("datatypes");
}

String filter_normal_sample = DEFAULT_FILTER_NORMAL_SAMPLE;
if (cli.hasOption("filter_normal_sample")) {
if (cli.getOptionValue("filter_normal_sample").toLowerCase().equals("false")) {
filter_normal_sample = "false";
} else if (!cli.getOptionValue("filter_normal_sample").toLowerCase().equals("true")) {
GDCPipelineApplication.help(options, 0, "Filter Option must either be True or False");
}
}

String separate_mafs = DEFAULT_SEPARATE_MAF_FILES;
if (cli.hasOption("separate_mafs")) {
if (cli.getOptionValue("separate_mafs").toLowerCase().equals("true")) {
separate_mafs = "true";
} else if (!cli.getOptionValue("separate_mafs").toLowerCase().equals("false")) {
GDCPipelineApplication.help(options, 0, "MAF File Option must either be True or False");
}
}

String isoformOverrideSource = DEFAULT_ISOFORM_OVERRIDE_SOURCE;
if (cli.hasOption("isoformOverrideSource")) {
isoformOverrideSource = cli.getOptionValue("isoformOverrideSource");
}

String reference_genome_build = DEFAULT_REFERENCE_GENOME_BUILD;
if(cli.hasOption("reference_genome_build")){
reference_genome_build = cli.getOptionValue("reference_genome_build");
if(!(CommonDataUtil.REFERENCE_GENOME.build37.contains(reference_genome_build))){
GDCPipelineApplication.help(options, 0, reference_genome_build+" reference genome build is not currently supported.");
}
}

launchJob(args, cli.getOptionValue("source"), cli.getOptionValue("output"), cli.getOptionValue("cancer_study_id"), cli.getOptionValue("manifest_file"), filter_normal_sample, datatypes, separate_mafs,isoformOverrideSource,reference_genome_build);
}
}

0 comments on commit 22bd35d

Please sign in to comment.