Skip to content

Commit

Permalink
GraphQL endpoint for clinical data and cleanup
Browse files Browse the repository at this point in the history
Signed-off-by: Zachary Heins <zackheins@gmail.com>

Query gdc graphql on manifest file

Establishes framework for querying gdc graphql endpoint
Create a mapping of files -> cases for subsequent steps
Create a list of all cases from files for subsequent steps

Clinical data uses graphql + graphql util

Mutation step fixes
  • Loading branch information
zheins committed Jun 27, 2019
1 parent dcfb059 commit 58efdf1
Show file tree
Hide file tree
Showing 840 changed files with 770 additions and 208,925 deletions.
Expand Up @@ -2,12 +2,9 @@

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.cbio.gdcpipeline.decider.ClinicalFileTypeDecider;
import org.cbio.gdcpipeline.decider.StepDecider;
import org.cbio.gdcpipeline.tasklet.BiospecimenXmlDataTasklet;
import org.cbio.gdcpipeline.tasklet.ProcessManifestFileTasklet;
import org.cbio.gdcpipeline.tasklet.SetUpPipelineTasklet;
import org.cbio.gdcpipeline.util.CommonDataUtil;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
Expand Down Expand Up @@ -65,7 +62,7 @@ public Step setUpPipeline() {

@Bean
public ExecutionContextPromotionListener processManifestFileListener() {
String[] keys = new String[]{"gdcFileMetadatas"};
String[] keys = new String[]{"gdcManifestData", "caseIds"};
ExecutionContextPromotionListener executionContextPromotionListener = new ExecutionContextPromotionListener();
executionContextPromotionListener.setKeys(keys);
return executionContextPromotionListener;
Expand All @@ -92,35 +89,10 @@ public Tasklet setUpPipelineTasklet() {
return new SetUpPipelineTasklet();
}


@Bean
@StepScope
public Tasklet biospecimenXmlDataTasklet() {
return new BiospecimenXmlDataTasklet();
}

@Bean
public ExecutionContextPromotionListener biospecimenXmlDataListener() {
String[] keys = new String[]{"barcodeToSamplesMap"};
ExecutionContextPromotionListener executionContextPromotionListener = new ExecutionContextPromotionListener();
executionContextPromotionListener.setKeys(keys);
return executionContextPromotionListener;

}

@Bean
public Step biospecimenXmlDataStep() {
return stepBuilderFactory.get("biospecimenXmlDataStep")
.listener(biospecimenXmlDataListener())
.tasklet(biospecimenXmlDataTasklet())
.build();
}

@Bean
public Flow clinicalXmlDataFlow() {
return new FlowBuilder<Flow>("clinicalXmlDataFlow")
.start(biospecimenXmlDataStep())
.next(clinicalDataStep)
public Flow clinicalDataFlow() {
return new FlowBuilder<Flow>("clinicalDataFlow")
.start(clinicalDataStep)
.next(clinicalMetaDataStep)
.build();
}
Expand All @@ -134,25 +106,10 @@ public Flow mutationDataFlow() {
.build();
}

@Bean
public JobExecutionDecider clinicalFileTypeDecider() {
return new ClinicalFileTypeDecider();
}

@Bean
public Flow clinicalFileTypeDeciderFlow() {
return new FlowBuilder<Flow>("clinicalFileTypeDeciderFlow")
.start(clinicalFileTypeDecider())
.on(CommonDataUtil.GDC_DATAFORMAT.BCR_XML.toString()).to(clinicalXmlDataFlow())
.on("FAIL").fail()
.build();
}

@Bean
public Flow gdcAllDatatypesFlow() {
return new FlowBuilder<Flow>("gdcAllDatatypesFlow")
.start(clinicalFileTypeDecider())
.on(CommonDataUtil.GDC_DATAFORMAT.BCR_XML.toString()).to(clinicalXmlDataFlow())
.start(clinicalDataFlow())
.next(mutationDataFlow())
.build();
}
Expand All @@ -169,7 +126,7 @@ public Flow stepDeciderFlow() {
return new FlowBuilder<Flow>("stepDeciderFlow")
.start(stepDecider())
.on(StepDecider.STEP.ALL.toString()).to(gdcAllDatatypesFlow())
.from(stepDecider()).on(StepDecider.STEP.CLINICAL.toString()).to(clinicalFileTypeDeciderFlow())
.from(stepDecider()).on(StepDecider.STEP.CLINICAL.toString()).to(clinicalDataFlow())
.from(stepDecider()).on(StepDecider.STEP.MUTATION.toString()).to(mutationDataFlow())
.build();
}
Expand Down
Expand Up @@ -2,16 +2,17 @@

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.cbio.gdcpipeline.model.rest.response.Hits;
import org.cbio.gdcpipeline.util.CommonDataUtil;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.StepExecutionListener;
import org.springframework.beans.factory.annotation.Value;

import java.io.File;
import java.nio.file.*;
import java.util.ArrayList;
import java.util.List;
import org.cbio.gdcpipeline.model.ManifestFileData;

/**
* @author Dixit Patel
Expand All @@ -31,8 +32,8 @@ public class MutationStepListener implements StepExecutionListener {
@Value("#{jobParameters[outputDirectory]}")
private String outputDir;

@Value("#{jobExecutionContext[gdcFileMetadatas]}")
private List<Hits> gdcFileMetadatas;
@Value("#{jobExecutionContext[gdcManifestData]}")
private List<ManifestFileData> gdcManifestData;

@Value("#{jobParameters[separate_mafs]}")
private String separate_mafs;
Expand Down Expand Up @@ -87,7 +88,25 @@ public ExitStatus afterStep(StepExecution stepExecution) {
}

public List<File> getMutationFileList() {
return CommonDataUtil.getFileList(gdcFileMetadatas, CommonDataUtil.GDC_TYPE.MUTATION, sourceDir);
List<File> mutationFileList = new ArrayList<>();
for (ManifestFileData dataFile : gdcManifestData) {
if (CommonDataUtil.GDC_TYPE.MUTATION.toString().equals(dataFile.getNormalizedDatatype())) {
Path path = Paths.get(sourceDir, dataFile.getId(), dataFile.getFilename());
File file = path.toFile();
mutationFileList.add(file);
}
}
try {
if (!mutationFileList.isEmpty()) {
mutationFileList = CommonDataUtil.extractCompressedFiles(mutationFileList);
}
else {
LOG.error("Mutation file list empty");
}
}
catch (Exception e) {
LOG.error("Could not extract maf files!");
}
return mutationFileList;
}

}
15 changes: 15 additions & 0 deletions src/main/java/org/cbio/gdcpipeline/model/ClinicalMetadataImpl.java
Expand Up @@ -5,6 +5,9 @@
/**
* @author Dixit Patel
*/

// TODO: This class should be replaced with dynamic calls to the data dictionary api
// oncotree.mskcc.org/cdd/swagger-ui.html
public class ClinicalMetadataImpl implements MetadataManager {
private Map<String, String> displayNames = new HashMap<>();
private Map<String, String> description = new HashMap<>();
Expand Down Expand Up @@ -48,6 +51,9 @@ private void setDisplayNames() {
this.displayNames.put("OS_STATUS", "Overall Survival Status");
this.displayNames.put("SEX", "Sex");
this.displayNames.put("AGE", "Age");
this.displayNames.put("SAMPLE_TYPE", "Sample Type");
this.displayNames.put("PRIMARY_SITE", "Primary Tumor Site");
this.displayNames.put("CANCER_TYPE", "Cancer Type");
}

private void setDescription() {
Expand All @@ -57,6 +63,9 @@ private void setDescription() {
this.description.put("OS_STATUS", "Overall Survival Status");
this.description.put("SEX", "Sex");
this.description.put("AGE", "Age");
this.description.put("SAMPLE_TYPE", "The type of sample (i.e., normal, primary, met, recurrence).");
this.description.put("PRIMARY_SITE", "Text term to describe the organ sub-division in an individual with cancer.");
this.description.put("CANCER_TYPE", "Cancer Type");
}

private void setDatatype() {
Expand All @@ -66,6 +75,9 @@ private void setDatatype() {
this.datatype.put("OS_STATUS", "STRING");
this.datatype.put("SEX", "STRING");
this.datatype.put("AGE", "NUMBER");
this.datatype.put("SAMPLE_TYPE", "STRING");
this.datatype.put("PRIMARY_SITE", "STRING");
this.datatype.put("CANCER_TYPE", "STRING");
}

private void setPriority() {
Expand All @@ -75,5 +87,8 @@ private void setPriority() {
this.priority.put("OS_STATUS", "1");
this.priority.put("SEX", "1");
this.priority.put("AGE", "1");
this.priority.put("SAMPLE_TYPE", "9");
this.priority.put("PRIMARY_SITE", "1");
this.priority.put("CANCER_TYPE", "1");
}
}

0 comments on commit 58efdf1

Please sign in to comment.