Skip to content

Commit

Permalink
Merge pull request #5 from griffithlab/develop
Browse files Browse the repository at this point in the history
Merge in changes from develop.
  • Loading branch information
gatoravi committed Oct 26, 2015
2 parents f9d9f22 + 01926fb commit 6ee65a7
Show file tree
Hide file tree
Showing 22 changed files with 1,005 additions and 71 deletions.
14 changes: 10 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@ compiler:
- gcc

before_script:
- mkdir build
- cd build
- cmake ..
- make
- if [ "$TRAVIS_OS_NAME" == "osx" ]; then (brew update && brew install lcov); fi
- gem install coveralls-lcov

script:
- mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=coverage && make
- ctest -V

after_success:
- cd build
- lcov -c -d . -o cov.txt
- lcov -e cov.txt "*/regtools/src/*" -o coverage.run.filtered.1
- lcov -r coverage.run.filtered.1 "*src/utils/*" "*/tests/*" -o coverage.run.filtered
- coveralls-lcov coverage.run.filtered

os:
- linux
- osx
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ endif()

add_subdirectory ("${PROJECT_SOURCE_DIR}/src/junctions/")
add_subdirectory ("${PROJECT_SOURCE_DIR}/src/gtf/")
add_subdirectory ("${PROJECT_SOURCE_DIR}/src/variants/")

#The main executable
add_executable (regtools src/regtools.cc)
target_link_libraries (regtools junctions bedtools gtf htslib)
target_link_libraries (regtools junctions variants bedtools gtf htslib)

#Testing
enable_testing()
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[![Build Status](https://travis-ci.org/griffithlab/regtools.svg?branch=master)](https://travis-ci.org/griffithlab/regtools)
[![Documentation Status](https://readthedocs.org/projects/regtools/badge/?version=latest)](https://readthedocs.org/projects/regtools/?badge=latest)
[![Coverage Status](https://coveralls.io/repos/griffithlab/regtools/badge.svg?branch=master&service=github)](https://coveralls.io/github/griffithlab/regtools?branch=master)

#regtools

Expand Down
6 changes: 5 additions & 1 deletion src/gtf/gtf_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ DEALINGS IN THE SOFTWARE. */
#include <fstream>
#include <iostream>
#include <set>
#include <stdexcept>
#include <vector>
#include <algorithm>
#include "common.h"
Expand Down Expand Up @@ -66,7 +67,10 @@ Gtf GtfParser::parse_exon_line(string line) {
Gtf gtf1;
vector<string> fields;
Tokenize(line, fields);
assert(fields.size() == 9);
if(fields.size() != 9) {
cerr << line << endl << fields.size();
throw runtime_error("Expected 9 fields in GTF line.");
}
if(fields[2] != "exon") {
gtf1.is_exon = false;
return gtf1;
Expand Down
62 changes: 14 additions & 48 deletions src/junctions/junctions_annotator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,14 @@ void JunctionsAnnotator::close_ofstream() {
//If output file is empty, set to cout
void JunctionsAnnotator::set_ofstream_object(ofstream &out) {
if(output_file_ == "NA") {
out.copyfmt(cout);
out.basic_ios<char>::rdbuf(cout.rdbuf());
out.clear(cout.rdstate());
copy_stream(cout, out);
return;
}
ofs_.open(output_file_.c_str());
if(!ofs_.is_open())
throw runtime_error("Unable to open " +
output_file_);
out.copyfmt(ofs_);
out.basic_ios<char>::rdbuf(ofs_.rdbuf());
out.clear(ofs_.rdstate());
copy_stream(ofs_, out);
}

//Open junctions file
Expand Down Expand Up @@ -129,7 +125,7 @@ bool JunctionsAnnotator::read_gtf() {
}

//Find overlap between transcript and junction on the negative strand,
//function returns true if this is a known junction in the transcript
//function returns true if either the acceptor or the donor is known
bool JunctionsAnnotator::overlap_ps(const vector<BED>& exons,
AnnotatedJunction & junction) {
//skip single exon genes
Expand All @@ -141,158 +137,132 @@ bool JunctionsAnnotator::overlap_ps(const vector<BED>& exons,
exons[exons.size() - 1].end < junction.start)
return false;
for(std::size_t i = 0; i < exons.size(); i++) {
cerr << endl << "exon number " << i << "\t" << exons[i].start
<< "\t" << exons[i].end;
if(exons[i].start > junction.end) {
cerr << endl << "-1";
//No need to look any further
//the rest of the exons are outside the junction
break;
}
//known junction
if(exons[i].end == junction.start &&
exons[i + 1].start == junction.end) {
cerr << endl << "DA";
junction.anchor = "DA";
junction.known_acceptor = true;
junction.known_donor = true;
junction.known_junction = true;
known_junction = true;
}
else {
cerr << endl << "0";
if(!junction_start) {
if(exons[i].end >= junction.start) {
junction_start = true;
cerr << endl << "1";
}
}
if(junction_start) {
if(exons[i].start > junction.start &&
exons[i].end < junction.end) {
cerr << endl << "2";
junction.exons_skipped.insert(exons[i].name);
}
if(exons[i].start > junction.start) {
cerr << endl << "3";
junction.donors_skipped.insert(exons[i].start);
}
if(exons[i].end < junction.end) {
cerr << endl << "4";
junction.acceptors_skipped.insert(exons[i].end);
}
if(exons[i].end == junction.start) {
cerr << endl << "5";
junction.known_donor = true;
}
//TODO - check for last exon
if(exons[i].start == junction.end) {
cerr << endl << "6";
junction.known_acceptor = true;
}
}
}
}
annotate_anchor(junction);
return known_junction;
return (junction.anchor != "N");
}

//Find overlap between transcript and junction on the negative strand,
//function returns true if this is a known junction in the transcript
//function returns true if either the acceptor or the donor is known
bool JunctionsAnnotator::overlap_ns(const vector<BED> & exons,
AnnotatedJunction & junction) {
cerr << endl << "in negative overlap";
//skip single exon genes
if(skip_single_exon_genes_ && exons.size() == 1) return false;
bool junction_start = false;
bool known_junction = false;
//check if transcript overlaps with junction
if(exons[0].end < junction.start ||
exons[exons.size() - 1].start > junction.end) {
cerr << endl << "transcript outside junction";
return known_junction;
}
for(std::size_t i = 0; i < exons.size(); i++) {
if(exons[i].end < junction.start) {
cerr << endl << "-1";
//No need to look any further
//the rest of the exons are outside the junction
break;
}
//Check if this is a known junction
if(exons[i].start == junction.end &&
exons[i + 1].end == junction.start) {
cerr << endl << "DA";
junction.anchor = "DA";
junction.known_acceptor = true;
junction.known_donor = true;
junction.known_junction = true;
known_junction = true;
}
else {
cerr << endl << "0\t" << i;
if(!junction_start) {
if(exons[i].start <= junction.end) {
cerr << endl << "1";
junction_start = true;
}
}
if(junction_start) {
if(exons[i].start > junction.start &&
exons[i].end < junction.end) {
cerr << endl << "2";
junction.exons_skipped.insert(exons[i].name);
}
if(exons[i].start > junction.start) {
cerr << endl << "3";
junction.donors_skipped.insert(exons[i].start);
}
if(exons[i].end < junction.end) {
cerr << endl << "4";
junction.acceptors_skipped.insert(exons[i].end);
}
if(exons[i].start == junction.end) {
cerr << endl << "5";
junction.known_donor = true;
}
if(exons[i].end == junction.start) {
cerr << endl << "6";
junction.known_acceptor = true;
}
}
}
}
annotate_anchor(junction);
return known_junction;
return (junction.anchor != "N");
}

//Annotate the anchor
//Annotate the anchor i.e is this a known/novel donor-acceptor pair
void JunctionsAnnotator::annotate_anchor(AnnotatedJunction & junction) {
//check if known junction
if(!junction.known_junction) {
junction.anchor = string("N");
if(junction.known_junction) {
junction.anchor = "DA";
} else {
if(junction.known_donor)
if(junction.known_acceptor)
junction.anchor = string("NDA");
else
junction.anchor = string("D");
else if(junction.known_acceptor)
junction.anchor = string("A");
else
junction.anchor = string("N");
}
}

//Check for overlap between a transcript and junctions
//Check if the junction we saw is a known junction
//Calculate exons_skipped, donors_skipped, acceptors_skipped
void JunctionsAnnotator::check_for_overlap(string transcript_id, AnnotatedJunction & junction) {
cerr << "\nTranscript id: " << transcript_id;
const vector<BED> & exons =
gtf_.get_exons_from_transcript(transcript_id);
if(!exons.size()) {
cerr << "Unexpected error. No exons for transcript "
<< transcript_id;
exit(1);
throw runtime_error("Unexpected error. No exons for transcript "
+ transcript_id);
}
string transcript_strand = exons[0].strand;
//Make sure the strands of the junction and transcript match
Expand All @@ -312,8 +282,7 @@ void JunctionsAnnotator::check_for_overlap(string transcript_id, AnnotatedJuncti
gtf_.get_gene_from_transcript(transcript_id));
}
} else {
cerr << "\nUnknown strand " << junction.strand;
exit(1);
throw runtime_error("\nUnknown strand " + junction.strand);
}
cerr << "\n\tDonors skipped " << junction.donors_skipped.size();
cerr << "\n\tExons skipped " << junction.exons_skipped.size();
Expand All @@ -324,12 +293,10 @@ void JunctionsAnnotator::check_for_overlap(string transcript_id, AnnotatedJuncti
//Annotate with gtf
//Takes a single junction BED and annotates with GTF
void JunctionsAnnotator::annotate_junction_with_gtf(AnnotatedJunction & j1) {
BIN junction_bin = getBin(j1.start, j1.end);
//From BedTools
BIN start_bin, end_bin;
start_bin = (j1.start >> _binFirstShift);
end_bin = ((j1.end-1) >> _binFirstShift);
cerr << endl << "junction_bin " << junction_bin;
//We loop through each UCSC BIN level for feature A's chrom.
//For each BIN, we loop through each B feature and add it to
// hits if it meets all of the user's requests.
Expand Down Expand Up @@ -379,7 +346,6 @@ int JunctionsAnnotator::parse_options(int argc, char *argv[]) {
break;
default:
usage();
cerr << c << " here1";
throw runtime_error("\nError parsing inputs!");
}
}
Expand Down
23 changes: 23 additions & 0 deletions src/junctions/junctions_annotator.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ struct AnnotatedJunction : BED {
bool known_acceptor;
//Is this a known junction
bool known_junction;
//Annotation - Exonic/Intronic etc.
string annotation;
//Print the header line
static void print_header(ostream& out) {
out << "chrom" << "\t" << "start" <<
Expand Down Expand Up @@ -103,6 +105,7 @@ struct AnnotatedJunction : BED {
//Clear the contents of the junction
void reset() {
anchor = string("N");
annotation = "";
splice_site = "";
known_donor = false;
known_acceptor = false;
Expand All @@ -113,8 +116,28 @@ struct AnnotatedJunction : BED {
transcripts_overlap.clear();
genes_overlap.clear();
}
//constructor
AnnotatedJunction() {
reset();
}
//constructor
AnnotatedJunction(string chr1,
uint32_t start1,
uint32_t end1) {
chrom = chr1;
start = start1;
end = end1;
}
};

//Copy one stream object into another
inline
void copy_stream(const ostream &source, ostream &dest) {
dest.copyfmt(source);
dest.basic_ios<char>::rdbuf(source.rdbuf());
dest.clear(source.rdstate());
}

//The class that does all the annotation
//Uses a GTF parser object to annotate a junction.
class JunctionsAnnotator {
Expand Down
8 changes: 4 additions & 4 deletions src/junctions/junctions_creator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,13 +324,13 @@ int JunctionsCreator::identify_junctions_from_BAM() {
//open BAM for reading
samFile *in = sam_open(bam_.c_str(), "r");
if(in == NULL) {
return 1;
throw runtime_error("Unable to open BAM/SAM file.");
}
//Load the index
hts_idx_t *idx = sam_index_load(in, bam_.c_str());
if(idx == NULL) {
cerr << "Unable to load alignment index. Please index with Samtools.";
return -1;
throw runtime_error("Unable to open BAM/SAM index."
" Make sure alignments are indexed");
}
//Get the header
bam_hdr_t *header = sam_hdr_read(in);
Expand All @@ -340,7 +340,7 @@ int JunctionsCreator::identify_junctions_from_BAM() {
iter = sam_itr_querys(idx, header, region_.c_str());
if(header == NULL || iter == NULL) {
sam_close(in);
return 1;
throw runtime_error("Unable to iterate to region within BAM.");
}
//Initiate the alignment record
bam1_t *aln = bam_init1();
Expand Down
7 changes: 2 additions & 5 deletions src/junctions/junctions_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,13 @@ int junctions_create(int argc, char *argv[]) {
JunctionsCreator create;
try {
create.parse_options(argc, argv);
create.identify_junctions_from_BAM();
create.print_all_junctions();
} catch(const runtime_error& error) {
cerr << error.what();
create.usage();
return 1;
}
if(create.identify_junctions_from_BAM()) {
create.usage();
return 1;
}
create.print_all_junctions();
return 0;
}

Expand Down

0 comments on commit 6ee65a7

Please sign in to comment.