diff --git a/CHANGES.txt b/CHANGES.txt index 97cca145b7..d216f0a188 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,9 @@ +3.3.0 + + * `plotCoverage` now has a `--BED` option, to restrict plots and output to apply to a specific set of regions given by a BED or GTF file or files (issue #829). + * `plotCoverage` now has a `--DepthSummary` option, which produces a summary similar to GATK's DepthOfCoverage (issue #828). + * `plotCoverage` is now able to compute coverage metrics for arbitrary coverage thresholds using multiples of the `-ct` option (e.g., `-ct 0 -ct 10 -ct 20 -ct 30`). + 3.2.1 * Changed a bug in `estimateReadFiltering` where the estimated number of filtered reads was typically too low. diff --git a/deeptools/_version.py b/deeptools/_version.py index 1d0891b80f..f201aca3bc 100644 --- a/deeptools/_version.py +++ b/deeptools/_version.py @@ -2,4 +2,4 @@ # This file is originally generated from Git information by running 'setup.py # version'. Distribution tarballs contain a pre-generated copy of this file. -__version__ = '3.2.1' +__version__ = '3.3.0' diff --git a/deeptools/countReadsPerBin.py b/deeptools/countReadsPerBin.py index 62c3eedb2c..8ce4b59ae5 100644 --- a/deeptools/countReadsPerBin.py +++ b/deeptools/countReadsPerBin.py @@ -130,6 +130,9 @@ class CountReadsPerBin(object): mappedList : list For each BAM file in bamFilesList, the number of mapped reads in the file. + bed_and_bin : boolean + If true AND a bedFile is given, compute coverage of each bin of the given size in each region of bedFile + Returns ------- numpy array @@ -171,6 +174,7 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro minFragmentLength=0, maxFragmentLength=0, out_file_for_raw_data=None, + bed_and_bin=False, statsList=[], mappedList=[]): @@ -181,6 +185,7 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro self.statsList = statsList self.mappedList = mappedList self.skipZeroOverZero = skipZeroOverZero + self.bed_and_bin = bed_and_bin if extendReads and len(bamFilesList): from deeptools.getFragmentAndReadSize import get_read_and_fragment_length @@ -463,7 +468,10 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None): # A list of lists of tuples transcriptsToConsider = [] if bed_regions_list is not None: - transcriptsToConsider = [x[1] for x in bed_regions_list] + if self.bed_and_bin: + transcriptsToConsider.append([(x[1][0][0], x[1][0][1], self.binLength) for x in bed_regions_list]) + else: + transcriptsToConsider = [x[1] for x in bed_regions_list] else: if self.stepSize == self.binLength: transcriptsToConsider.append([(start, end, self.binLength)]) @@ -484,7 +492,7 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None): for bam in bam_handles: for trans in transcriptsToConsider: tcov = self.get_coverage_of_region(bam, chrom, trans) - if bed_regions_list is not None: + if bed_regions_list is not None and not self.bed_and_bin: subnum_reads_per_bin.append(np.sum(tcov)) else: subnum_reads_per_bin.extend(tcov) diff --git a/deeptools/plotCoverage.py b/deeptools/plotCoverage.py index d918ace0b8..dcc3cc0689 100644 --- a/deeptools/plotCoverage.py +++ b/deeptools/plotCoverage.py @@ -118,11 +118,34 @@ def required_args(): type=int, default=1000000) + optional.add_argument('--BED', + help='Limits the coverage analysis to ' + 'the regions specified in these files. This overrides --numberOfSamples. ' + 'Due to memory requirements, it is inadvised to combine this with ' + '--outRawCounts or many tens of thousands of regions, as per-base ' + 'coverage is used!', + metavar='FILE1.bed FILE2.bed', + nargs='+') + optional.add_argument('--outRawCounts', help='Save raw counts (coverages) to file.', type=parserCommon.writableFile, metavar='FILE') + optional.add_argument('--outCoverageMetrics', + help='Save percentage of bins/regions above the specified thresholds to ' + 'the specified file. The coverage thresholds are specified by ' + '--coverageThresholds. If no coverage thresholds are specified, the file ' + 'will be empty.', + type=parserCommon.writableFile, + metavar='FILE') + + optional.add_argument('--coverageThresholds', '-ct', + type=int, + action="append", + help='The percentage of reported bins/regions with signal at least as ' + 'high as the given threshold. This can be specified multiple times.') + optional.add_argument('--plotHeight', help='Plot height in cm.', type=float, @@ -148,11 +171,17 @@ def required_args(): def main(args=None): args = process_args(args) - if args.outRawCounts is None and args.plotFile is None: - sys.exit("At least one of --plotFile and --outRawCounts are required.\n") + if not args.outRawCounts and not args.plotFile and not args.outCoverageMetrics: + sys.exit("At least one of --plotFile, --outRawCounts and --outCoverageMetrics are required.\n") + + if 'BED' in args: + bed_regions = args.BED + else: + bed_regions = None cr = countR.CountReadsPerBin(args.bamfiles, binLength=1, + bedFile=bed_regions, numberOfSamples=args.numberOfSamples, numberOfProcessors=args.numberOfProcessors, verbose=args.verbose, @@ -166,10 +195,22 @@ def main(args=None): samFlag_exclude=args.samFlagExclude, minFragmentLength=args.minFragmentLength, maxFragmentLength=args.maxFragmentLength, + bed_and_bin=True, out_file_for_raw_data=args.outRawCounts) num_reads_per_bin = cr.run() + if args.outCoverageMetrics and args.coverageThresholds: + args.coverageThresholds.sort() # Galaxy in particular tends to give things in a weird order + of = open(args.outCoverageMetrics, "w") + of.write("Sample\tThreshold\tPercent\n") + nbins = float(num_reads_per_bin.shape[0]) + for thresh in args.coverageThresholds: + vals = np.sum(num_reads_per_bin >= thresh, axis=0) + for lab, val in zip(args.labels, vals): + of.write("{}\t{}\t{:6.3f}\n".format(lab, thresh, 100. * val / nbins)) + of.close() + if args.outRawCounts: # append to the generated file the # labels diff --git a/galaxy/wrapper/deepTools_macros.xml b/galaxy/wrapper/deepTools_macros.xml index 13b8fe809d..51834b53d5 100644 --- a/galaxy/wrapper/deepTools_macros.xml +++ b/galaxy/wrapper/deepTools_macros.xml @@ -1,10 +1,10 @@ --numberOfProcessors "\${GALAXY_SLOTS:-4}" - 3.2.1.0 + 3.3.0.0 - deeptools + deeptools samtools diff --git a/galaxy/wrapper/plotCoverage.xml b/galaxy/wrapper/plotCoverage.xml index 60f8ef9d46..6fee4a5381 100644 --- a/galaxy/wrapper/plotCoverage.xml +++ b/galaxy/wrapper/plotCoverage.xml @@ -25,6 +25,23 @@ --outRawCounts '$outFileRawCounts' #end if + #if ' '.join(map(str, $BED)) != 'None': + #set bedFileLList=[] + #for $f in $BED: + #silent $bedFileList.append("'%s'" % $f) + #end for + #if $bedFileList != ["'None'"]: + --BED #echo ' '.join($bedFileList)# + #end if + #end if + + #if $coverageOpt.showCoverageOpt == "yes": + --outCoverageMetrics '$outFileCoverageMetrics' + #for $t in $coverageOpt.thresholds: + -ct $t.coverageThreshold + #end for + #end if + #if $advancedOpt.showAdvancedOpt == "yes": --numberOfSamples '$advancedOpt.numberOfSamples' $advancedOpt.skipZeros @@ -48,11 +65,28 @@ + + + + + + + + + + + + + + + - - + + @@ -78,6 +112,9 @@ outRawCounts is True + + coverageOpt.outCoverageMetrics is True + @@ -89,6 +126,18 @@ + + + + + + + + + + + +