Permalink
Browse files

cleaning up some of the parameters for tangent that plug through to C…

…apSeg.scala
  • Loading branch information...
1 parent 49997f0 commit 2713632a45b24d65481a8f9cbe3959a3e23ef062 @aaronmck committed Aug 3, 2013
Showing with 22 additions and 30 deletions.
  1. +6 −6 R/tangent_normalize.R
  2. +16 −24 utils/CapSeg.scala
View
@@ -15,20 +15,17 @@ option.list <- list(
make_option(c("--tumor.lane.data"),help="the tumor exome coverage: lanes as rows, exome targets (or baits) as rows",default="blank"),
make_option(c("--target.list"),help="the list of targets we captured in sequencing",default="blank"),
make_option(c("--script.dir"),help="where we can find the wesseg scripts - where you placed the checked out tool into",default="blank"),
- make_option(c("--normal.sample.to.bam.file"),help="the file containing the mapping of the sample names (for normals) to bam files",default="blank"),
- make_option(c("--tumor.sample.to.bam.file"),help="the file containing the mapping of the sample names (for tumors) to bam files",default="blank"),
+ make_option(c("--normal.sample.bams"),help="the file containing the mapping of the sample names (for normals) to bam files",default="blank"),
+ make_option(c("--tumor.sample.bams"),help="the file containing the mapping of the sample names (for tumors) to bam files",default="blank"),
make_option(c("--output.location"),help="where to write output files to - the segmentation results plus any graphs",default="blank"),
make_option(c("--tangent.database.location"),help="the directory of tangent planes to normalize against; this directory should contain only tangent planes",default="blank"),
make_option(c("--output.tangent.database"),help="the directory where we put the output tangent data",default="blank"),
make_option(c("--build"),help="are we running with hg18 and hg19",default="blank"),
make_option(c("--analysis.set.name"),help="what was the name of the analysis set",default="blank"),
- make_option(c("--bylane"),help="is the data coming in by lane? (if not it should be by sample)",default="blank"),
- make_option(c("--parallel"),help="should we merge lanes to samples in parallel",default="blank"),
make_option(c("--bait.factor"),help="the bait factor data file",default="blank"),
make_option(c("--bam.file.listing"),help="the listing of bam files, by tumor and by normal",default="blank"),
make_option(c("--signal.files"),help="the sample name to signal file",default="blank"),
make_option(c("--use.histo.data"),help="should we use historical data",default="blank"),
- make_option(c("--sex.calls"),help="a file mapping the id to the sex of each patient",default="blank"),
make_option(c("--debug"),help="dump lots of debugging data to the <output_dir>/debug directory",default="blank")
)
opt <- parse_args(OptionParser(option_list=option.list))
@@ -154,8 +151,11 @@ female.normals <- normal.data[,is.element(colnames(normal.data),sex.calls$sample
male.calibrated <- calibrate.and.pi.tumors(male.tumors,male.normals)
female.calibrated <- calibrate.and.pi.tumors(female.tumors,female.normals)
+# put together the calibrated data
+calibrated.tumors <- cbind(male.calibrated,female.calibrated)
+
# save off the data to a Rdata object for later
-save.off.processed.data(log.normals,log.tumors,calibrated.tumors,baits,paste(tangent.database.output,build.version,sep="/"),analysis.set.name,build.version)
+save.off.processed.data(log2(normal.data),log2(tumor.data),calibrated.tumors,baits,paste(tangent.database.output,build.version,sep="/"),analysis.set.name,build.version)
# output the raw data and plots for each sample
output.and.plot.data(calibrated.tumors,tumor.data,baits,output.location,signal.files)
View
@@ -236,25 +236,22 @@ class CapSeg extends QScript {
firehoseInport.close()
// run the final R script which puts it all together
- add(new PostProcessData(libraryDir,
+ add(new PostProcessData(samples,
+ libraryDir,
normalSampleFile,
tumorSampleFile,
baitCSV,
- true,
- new File(outputDir.getAbsolutePath() + "/.cache"),
- normalSampleToPGFile,
- tumorSampleToPGFile,
+ normalBamToSampleFile,
+ normalBamToSampleFile,
outputDir,
tangentLocation,
tangentOutputLocation,
build,
analysisSet,
- perLane,
finalNormalMatrixBF,
signal,
signalFile,
- useHistData,
- alleleOutput))
+ useHistData))
}
@@ -387,36 +384,31 @@ class SegmentSample(libraryDir: File, bamName: String, bamToSample: File, output
// post process the data using the R script
-class PostProcessData(libraryDir: File, normal_bait_coverage: File, tumor_bait_coverage: File, target_file: File,
- useCachedData: Boolean, cachedLocation: File, sToRGFileNormals: File, sToRGFileTumors: File,
- outputDir: File, tangentLocation: File, tangentOutputLocation: File, buildType: String, analysisSet: String,
- byLane: Boolean, normalBaitFile: File, signalFLs: List[File], signalTSV: File, histoData: Boolean, acovFiles: List[File]) extends CommandLineFunction {
- @Input(doc = "the normal bait output file") var normalFile = normal_bait_coverage
- @Input(doc = "the tumor bait output file") var tumorFile = tumor_bait_coverage
+class PostProcessData(indTable: File, libraryDir: File, normal_bams: File, tumor_bams: File, target_file: File,
+ normalBamListing: File, tumorBamListing: File, outputDir: File, tangentLocation: File, tangentOutputLocation: File,
+ buildType: String, analysisSet: String,
+ normalBaitFile: File, signalFLs: List[File], signalTSV: File, histoData: Boolean) extends CommandLineFunction {
+ @Input(doc = "the sample table containing individuals and their t/n bam files") var inds = indTable
+ @Input(doc = "the normal bait output file") var normalFile = normal_bams
+ @Input(doc = "the tumor bait output file") var tumorFile = tumor_bams
@Input(doc = "the target list") var targetFile = target_file
@Input(doc = "where can we find the WES segmentation algorithm") var libDir = libraryDir
- @Input(doc = "the file containing the mapping of sample to read group (normals)") var normalSampleToReadGroupFile = sToRGFileNormals
- @Input(doc = "the file containing the mapping of sample to read group (tumors)") var tumorSampleToReadGroupFile = sToRGFileTumors
+ @Input(doc = "the file containing the mapping of sample to bams (normals)") var normalBams = tumorBamListing
+ @Input(doc = "the file containing the mapping of sample to bams (tumors)") var tumorBams = normalBamListing
@Input(doc = "the output location") var outputDirectory = outputDir
@Input(doc = "tangent normalization database location") var tangentDatabase = tangentLocation
@Input(doc = "tangent normalization database location") var tangentOutputDatabase = tangentOutputLocation
@Input(doc = "signal tsv file") var signalTSVFile = signalTSV
- @Input(doc = "coverage files from the allele balance pulldown") var coverageFiles = acovFiles
@Output(doc = "the signal file outputs") var signalFiles = signalFLs
- @Output(doc = "the cache location") var cacheLocation = cachedLocation
@Argument(doc = "the build type, hg18 or hg19") var build = buildType
@Argument(doc = "the analysis name") var analysisSetName = analysisSet
- @Argument(doc = "are we running by lane?") var byLaneData = byLane
@Argument(doc = "the normal bait factor file") var nbf = normalBaitFile
@Argument(doc = "should we use historical data") var uhd = histoData
- //@Output(doc="the sample information file") var sampleInfoFile = new File(outputDir.getAbsolutePath() + "/sampleInformation.txt")
-
- @Argument(doc = "should we use the cache file") var useCache = useCachedData
- memoryLimit = Some(16) // change me
- def commandLine = "Rscript %s/R/tangent_normalize.R --normal.lane.data %s --tumor.lane.data %s --target.list %s --use.cache %s --cache.location %s --script.dir %s --normal.sample.to.lanes.file %s --tumor.sample.to.lanes.file %s --output.location %s --tangent.database.location %s --output.tangent.database %s --build %s --analysis.set.name %s --bylane %s --bait.factor %s --signal.files %s --histo.data %s".format(libDir.getAbsolutePath(), normalFile.getAbsolutePath(), tumorFile.getAbsolutePath(), targetFile.getAbsolutePath(), useCache, cacheLocation.getAbsolutePath(), libDir.getAbsolutePath(), normalSampleToReadGroupFile.getAbsolutePath(), tumorSampleToReadGroupFile.getAbsolutePath(), outputDirectory.getAbsolutePath(), tangentDatabase.getAbsolutePath(), tangentOutputDatabase.getAbsolutePath(), build, analysisSetName,byLaneData, nbf.getAbsolutePath(), signalTSVFile.getAbsolutePath(), uhd)
+ memoryLimit = Some(8)
+ def commandLine = "Rscript %s/R/tangent_normalize.R --sample.table %s --normal.lane.data %s --tumor.lane.data %s --target.list %s --script.dir %s --normal.sample.bams %s --tumor.sample.bams %s --output.location %s --tangent.database.location %s --output.tangent.database %s --build %s --analysis.set.name %s --bait.factor %s --signal.files %s --histo.data %s".format(libDir.getAbsolutePath(), indTable.getAbsolutePath(), normalFile.getAbsolutePath(), tumorFile.getAbsolutePath(), targetFile.getAbsolutePath(), libDir.getAbsolutePath(), normalBams.getAbsolutePath(), tumorBams.getAbsolutePath(), outputDirectory.getAbsolutePath(), tangentDatabase.getAbsolutePath(), tangentOutputDatabase.getAbsolutePath(), build, analysisSetName, nbf.getAbsolutePath(), signalTSVFile.getAbsolutePath(), uhd)
}
// correct the output files for any dups lines and extra headers

0 comments on commit 2713632

Please sign in to comment.