Permalink
Browse files

Large commit

	1. Reset BED.strand to be string instead of char in the interest of generality.
	2. Fixed bug in genomeCoverageBed where NULL entries where genome summaries were err. reported with -bga.
	3. Fixed bug in closestBed where NULL entries were err. reported when BED > 6.
	4. Standardized the Open, Get, Close idiom per improvements from Gordon Assaf.
	5. Add "split" logic to bamToBed.
  • Loading branch information...
1 parent bda26ee commit d7e223614f8a962c914d14867f62fb5e3d7d027d @arq5x committed Jun 29, 2010
Showing with 532 additions and 680 deletions.
  1. 0 .cproject
  2. 0 .project
  3. 0 LICENSE
  4. 0 RELEASE_HISTORY
  5. 0 data/knownGene.hg18.chr21.bed
  6. 0 data/rmsk.hg18.chr21.bed
  7. 0 genomes/human.hg18.genome
  8. 0 genomes/human.hg19.genome
  9. 0 genomes/mouse.mm8.genome
  10. 0 genomes/mouse.mm9.genome
  11. +0 −45 src/bamFillMateSeq/Makefile
  12. +0 −294 src/bamFillMateSeq/bamFillMateSeq.cpp
  13. +2 −2 src/bamToBed/Makefile
  14. +53 −23 src/bamToBed/bamToBed.cpp
  15. +2 −2 src/bedToBam/Makefile
  16. +7 −4 src/bedToBam/bedToBam.cpp
  17. +5 −31 src/closestBed/closestBed.cpp
  18. +12 −15 src/complementBed/complementBed.cpp
  19. +1 −1 src/fastaFromBed/fastaFromBed.cpp
  20. +1 −1 src/genomeCoverageBed/genomeCoverageBed.cpp
  21. +1 −4 src/linksBed/linksBed.cpp
  22. +7 −6 src/mergeBed/mergeBed.cpp
  23. +4 −6 src/pairToBed/pairToBed.cpp
  24. +6 −6 src/pairToBed/pairToBed.h
  25. +23 −24 src/pairToPair/pairToPair.cpp
  26. +6 −6 src/pairToPair/pairToPair.h
  27. +67 −65 src/shuffleBed/shuffleBed.cpp
  28. +1 −1 src/slopBed/slopBed.cpp
  29. +4 −6 src/subtractBed/subtractBed.cpp
  30. 0 src/utils/BamTools/BGZF.cpp
  31. 0 src/utils/BamTools/BGZF.h
  32. +65 −0 src/utils/BamTools/BamAncillary.cpp
  33. +18 −0 src/utils/BamTools/BamAncillary.h
  34. 0 src/utils/BamTools/BamAux.h
  35. +3 −3 src/utils/BamTools/BamReader.cpp
  36. 0 src/utils/BamTools/BamReader.h
  37. 0 src/utils/BamTools/BamWriter.cpp
  38. 0 src/utils/BamTools/BamWriter.h
  39. +4 −1 src/utils/BamTools/Makefile
  40. +40 −56 src/utils/bedFile/bedFile.cpp
  41. +173 −49 src/utils/bedFile/bedFile.h
  42. +8 −8 src/utils/bedFilePE/bedFilePE.cpp
  43. +9 −9 src/utils/bedFilePE/bedFilePE.h
  44. 0 src/utils/gzstream/COPYING.LIB
  45. 0 src/utils/gzstream/Makefile
  46. 0 src/utils/gzstream/README
  47. 0 src/utils/gzstream/gzstream.C
  48. 0 src/utils/gzstream/gzstream.h
  49. BIN src/utils/gzstream/gzstream.o
  50. BIN src/utils/gzstream/test_gunzip.o
  51. BIN src/utils/gzstream/test_gzip.o
  52. 0 src/utils/gzstream/version
  53. 0 src/utils/version/version.h
  54. +9 −11 src/windowBed/windowBed.cpp
  55. +1 −1 src/windowBed/windowBed.h
View
0 .cproject 100644 → 100755
No changes.
View
0 .project 100644 → 100755
No changes.
View
0 LICENSE 100644 → 100755
No changes.
View
0 RELEASE_HISTORY 100644 → 100755
No changes.
View
0 data/knownGene.hg18.chr21.bed 100644 → 100755
No changes.
View
0 data/rmsk.hg18.chr21.bed 100644 → 100755
No changes.
View
0 genomes/human.hg18.genome 100644 → 100755
No changes.
View
0 genomes/human.hg19.genome 100644 → 100755
No changes.
View
0 genomes/mouse.mm8.genome 100644 → 100755
No changes.
View
0 genomes/mouse.mm9.genome 100644 → 100755
No changes.
@@ -1,45 +0,0 @@
-CXX= g++
-CXXFLAGS= -Wall -g
-LIBS= -lz
-UTILITIES_DIR = ../utils/
-OBJ_DIR = ../../obj/
-BIN_DIR = ../../bin/
-
-# -------------------
-# define our includes
-# -------------------
-INCLUDES = -I$(UTILITIES_DIR)/BamTools/ -I$(UTILITIES_DIR)/sequenceUtilities/ -I$(UTILITIES_DIR)/version/
-
-# ----------------------------------
-# define our source and object files
-# ----------------------------------
-SOURCES= bamFillMateSeq.cpp
-OBJECTS= $(SOURCES:.cpp=.o)
-_EXT_OBJECTS=BamReader.o BamWriter.o sequenceUtils.o BGZF.o
-EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
-BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
-PROGRAM= bamFillMateSeq
-
-
-all: $(PROGRAM)
-
-.PHONY: all
-
-
-$(PROGRAM): $(BUILT_OBJECTS) $(EXT_OBJECTS)
- @echo " * linking $(PROGRAM)"
- @$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $(BIN_DIR)/$@ $^ $(LIBS)
-
-$(BUILT_OBJECTS): $(SOURCES)
- @echo " * compiling" $(*F).cpp
- @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
-
-$(EXT_OBJECTS):
- @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/sequenceUtilities/
- @$(MAKE) --no-print-directory -C $(UTILITIES_DIR)/BamTools/
-
-clean:
- @echo "Cleaning up."
- @rm -f $(OBJ_DIR)/* $(BIN_DIR)/*
-
-.PHONY: clean
@@ -1,294 +0,0 @@
-/*****************************************************************************
- bamToBed.cpp
-
- (c) 2009 - Aaron Quinlan
- Hall Laboratory
- Department of Biochemistry and Molecular Genetics
- University of Virginia
- aaronquinlan@gmail.com
-
- Licenced under the GNU General Public License 2.0+ license.
-******************************************************************************/
-#include "version.h"
-#include "sequenceUtils.h"
-#include "BamReader.h"
-#include "BamWriter.h"
-#include "BamAux.h"
-using namespace BamTools;
-
-#include <vector>
-#include <iostream>
-#include <fstream>
-#include <stdlib.h>
-
-using namespace std;
-
-
-// define our program name
-#define PROGRAM_NAME "bamFillMateSeq"
-
-// define our parameter checking macro
-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
-
-// function declarations
-void ShowHelp(void);
-void GetSeqsAndQuals(const vector<BamAlignment> &alignments,
- string &seq1,
- string &qual1,
- string &seq,
- string &qual2);
-void FillMateSeqForReadBlock (const vector<BamAlignment> &alignments,
- const RefVector &refs,
- BamWriter &writer,
- const string &seq1,
- const string &qual1,
- const string &seq,
- const string &qual2);
-int strnum_cmp(const char *a, const char *b);
-
-
-int main(int argc, char* argv[]) {
-
- // our configuration variables
- bool showHelp = false;
- bool haveBam = false;
-
- // input files
- string bamFile;
-
- // check to see if we should print out some help
- if(argc <= 1) showHelp = true;
-
- for(int i = 1; i < argc; i++) {
- int parameterLength = (int)strlen(argv[i]);
-
- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
- (PARAMETER_CHECK("--help", 5, parameterLength))) {
- showHelp = true;
- }
- }
-
- if(showHelp) ShowHelp();
-
- // do some parsing (all of these parameters require 2 strings)
- for(int i = 1; i < argc; i++) {
-
- int parameterLength = (int)strlen(argv[i]);
-
- if(PARAMETER_CHECK("-i", 2, parameterLength)) {
- if ((i+1) < argc) {
- haveBam = true;
- bamFile = argv[i + 1];
- i++;
- }
- }
- else {
- cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
- showHelp = true;
- }
- }
-
- // make sure we have an input files
- if (!haveBam ) {
- cerr << endl << "*****" << endl << "*****ERROR: Need -i (BAM) file. " << endl << "*****" << endl;
- showHelp = true;
- }
-
- if (!showHelp) {
-
- // open the BAM file
- BamReader reader;
- reader.Open(bamFile);
- BamWriter writer;
-
- // get header & reference information
- string header = reader.GetHeaderText();
- RefVector refs = reader.GetReferenceData();
-
- // open our BAM writer
- writer.Open("stdout", header, refs);
-
- // track the previous and current sequence
- // names so that we can identify blocks of
- // alignments for a given read ID.
- string prevName = "";
- string currName = "";
-
- vector<BamAlignment> alignments; // vector of BAM alignments for a given ID in a BAM file.
- alignments.reserve(100);
-
- BamAlignment bam;
- string seq1, qual1, seq2, qual2;
- // get each set of alignments for each pair.
- while (reader.GetNextAlignment(bam)) {
-
- currName = bam.Name;
- //printf("%s\t%s\n", currName.c_str(), prevName.c_str());
- if ( (strcmp(currName.c_str(), prevName.c_str()) != 0) && (prevName != "") ) {
- //printf("yep\n");
- // block change. enforce expected sort order.
- if (strnum_cmp(currName.c_str(), prevName.c_str()) == -1) {
- cerr << "Error: The BAM file (" << bamFile << ") is not sorted by sequence name. Exiting!" << endl;
- exit (1);
- }
- else {
- // process the current block of alignments
- GetSeqsAndQuals(alignments, seq1, qual1, seq2, qual2);
- FillMateSeqForReadBlock(alignments, refs, writer, seq1, qual1, seq2, qual2);
-
- // reset the alignment vector for the next block
- // and add the first alignment in the next block
- alignments.clear();
- alignments.push_back(bam);
- }
- }
- else {
- alignments.push_back(bam);
- }
- prevName = currName;
- }
- // process the last block
- GetSeqsAndQuals(alignments, seq1, qual1, seq2, qual2);
- FillMateSeqForReadBlock(alignments, refs, writer, seq1, qual1, seq2, qual2);
-
- // close up
- reader.Close();
- writer.Close();
- }
- else {
- ShowHelp();
- }
-}
-
-
-void GetSeqsAndQuals(const vector<BamAlignment> &alignments,
- string &seq1,
- string &qual1,
- string &seq2,
- string &qual2) {
-
- bool haveSeq1, haveSeq2, haveQual1, haveQual2;
- haveSeq1 = haveSeq2 = haveQual1 = haveQual2 = false;
-
- // extract the sequences for the first and second mates
-
- vector<BamAlignment>::const_iterator alItr = alignments.begin();
- vector<BamAlignment>::const_iterator alEnd = alignments.end();
- while ( (alItr != alEnd) && (haveSeq1 == false || haveSeq2 == false ||
- haveQual1 == false || haveQual2 == false)) {
-
- if ( alItr->IsFirstMate() ) {
- seq1 = alItr->QueryBases;
- qual1 = alItr->Qualities;
- if ( alItr->IsReverseStrand() ) {
- reverseComplement(seq1);
- reverseSequence(qual1);
- }
- haveSeq1 = true;
- haveQual1 = true;
- }
- else if ( alItr->IsSecondMate() ) {
- seq2 = alItr->QueryBases;
- qual2 = alItr->Qualities;
- if ( alItr->IsReverseStrand() ) {
- reverseComplement(seq2);
- reverseSequence(qual2);
- }
- haveSeq2 = true;
- haveQual2 = true;
- }
- ++alItr;
- }
-
- //cerr << alignments.size() << endl;
- cout << alignments[0].Qualities << endl;
- //seq1 = alignments[0].QueryBases;
- //qual1 = alignments[0].Qualities;
- //seq2 = alignments[1].QueryBases;
- //qual2 = alignments[1].Qualities;
- seq1 = "seq1\0";
- seq2 = "seq2\0";
- qual1 = "88###########################################################################88";
- qual2 = "77###########################################################################77";
-
- // IT MUST have to do with modifying the alignment itself.
-}
-
-
-void FillMateSeqForReadBlock (const vector<BamAlignment> &alignments,
- const RefVector &refs,
- BamWriter &writer,
- const string &seq1,
- const string &qual1,
- const string &seq2,
- const string &qual2) {
- // now update each alignment with the appropriate
- // sequence for it's mate
- cerr << "call\n";
- string joe = qual1;
- vector<BamAlignment>::const_iterator aItr = alignments.begin();
- vector<BamAlignment>::const_iterator aEnd = alignments.end();
- int i = 0;
- for (; aItr != aEnd; ++aItr) {
-
- cerr << i << " " << aItr->Name << " [" << qual1 << "]" << "\t[" << qual2 << "]" << "\t" << joe << endl;
- //if ( aItr->IsFirstMate() ) {
- //aItr->AddBamTag("R2", "Z", seq2);
- //aItr->AddBamTag("Q2", "Z", qual2);
- //}
- //else if ( aItr->IsSecondMate() ) {
- //aItr->AddBamTag("R2", "Z", seq1);
- //aItr->AddBamTag("Q2", "Z", qual1);
- //}
- //BamAlignment poop = *aItr;
- writer.SaveAlignment(*aItr);
- cerr << i << " " << aItr->Name << " [" << qual1 << "]" << "\t[" << qual2 << "]" << "\t" << joe << endl << endl;
- i++;
- }
-}
-
-
-// samtools sort comparison function from bam_sort.c
-// used for sorting by query name.
-int strnum_cmp(const char *a, const char *b) {
- char *pa, *pb;
- pa = (char*)a; pb = (char*)b;
- while (*pa && *pb) {
- if (isdigit(*pa) && isdigit(*pb)) {
- long ai, bi;
- ai = strtol(pa, &pa, 10);
- bi = strtol(pb, &pb, 10);
- if (ai != bi) return ai<bi? -1 : ai>bi? 1 : 0;
- } else {
- if (*pa != *pb) break;
- ++pa; ++pb;
- }
- }
- if (*pa == *pb)
- return (pa-a) < (pb-b)? -1 : (pa-a) > (pb-b)? 1 : 0;
- return *pa<*pb? -1 : *pa>*pb? 1 : 0;
-}
-
-
-void ShowHelp(void) {
-
- cerr << endl << "Program: " << PROGRAM_NAME << " (v" << VERSION << ")" << endl;
-
- cerr << "Author: Aaron Quinlan (aaronquinlan@gmail.com)" << endl;
-
- cerr << "Summary: Updates the R2 and Q2 BAM tag to include the mate's seq/qual." << endl << endl;
-
- cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -i <bam> " << endl << endl;
-
-// cerr << "Options: " << endl;
-
-// cerr << "\t-bedpe\t" << "Write BEDPE format." << endl << endl;
-
- cerr << "Notes: " << endl;
-
- cerr << "\tInput BAM must be sorted by read name" << endl << endl;
-
- // end the program here
- exit(1);
-}
-
View
@@ -8,14 +8,14 @@ BIN_DIR = ../../bin/
# -------------------
# define our includes
# -------------------
-INCLUDES = -I$(UTILITIES_DIR)/BamTools/ -I$(UTILITIES_DIR)/version/
+INCLUDES = -I$(UTILITIES_DIR)/bedFile/ -I$(UTILITIES_DIR)/BamTools/ -I$(UTILITIES_DIR)/version/ -I$(UTILITIES_DIR)/gzstream/
# ----------------------------------
# define our source and object files
# ----------------------------------
SOURCES= bamToBed.cpp
OBJECTS= $(SOURCES:.cpp=.o)
-_EXT_OBJECTS=BamReader.o BGZF.o
+_EXT_OBJECTS=BamReader.o BamAncillary.o BGZF.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
PROGRAM= bamToBed
Oops, something went wrong.

0 comments on commit d7e2236

Please sign in to comment.