Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge in changes from develop. #5

Merged
merged 36 commits into from
Oct 26, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
42a9ec3
Add coveralls to Travis YML
Oct 2, 2015
2005d46
Try without pyenv
Oct 2, 2015
b9928f7
Add coveralls badge
Oct 2, 2015
5506504
Add coverage flag to cmake
Oct 2, 2015
e1bce97
Factor out common into function
Oct 5, 2015
524b7d7
Annotate NDA, D, A with transcripts/genes
Oct 6, 2015
a0e20ab
Throw exceptions instead of return 1
Oct 7, 2015
e10f317
Get rid of debug statements.
Oct 8, 2015
f73c6db
Switch to exceptions.
Oct 8, 2015
eb663e5
Get rid of unused variable
Oct 8, 2015
ab26b77
Add variants annotate command
Oct 12, 2015
145ee5a
Make position unsigned
Oct 12, 2015
cf36b01
Add annotation field
Oct 13, 2015
583b197
Add constructors
Oct 13, 2015
0aac4ce
Add newline after params
Oct 13, 2015
9dce4ea
Rename location to annotation
Oct 13, 2015
559e34c
Rewrite overlap function with struct
Oct 13, 2015
efd4eb3
Fix include guard
Oct 13, 2015
612a6d1
Update overlap method declaration
Oct 13, 2015
ec8b23f
Update include paths
Oct 13, 2015
b6a5ca0
Add test for variants-annotate
Oct 13, 2015
75b5d4b
Move declaration outside loop.
Oct 15, 2015
e539d14
Update distance calculations.
Oct 15, 2015
23a0fd1
Remove debug annotation.
Oct 15, 2015
fcc376b
Update expected output.
Oct 15, 2015
d2943ce
Throw exception instead of assert.
Oct 15, 2015
11ced1d
Update distance calculations.
Oct 16, 2015
a934aa4
Update tests
Oct 16, 2015
0d54403
Handle transcripts on - strand correctly
Oct 16, 2015
a0b6cb7
Test for variants on a -ve strand transcript
Oct 16, 2015
449cdb7
Add option to skip single exon transcripts.
Oct 22, 2015
749cbf7
Add stderr to test message
Oct 22, 2015
3feeba5
Reorder program options.
gatoravi Oct 22, 2015
0e21191
Try to get coveralls working
gatoravi Oct 26, 2015
9010917
Try to fix brew issue on Linux workers.
gatoravi Oct 26, 2015
01926fb
use \$TRAVIS_OS_NAME
gatoravi Oct 26, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 10 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@ compiler:
- gcc

before_script:
- mkdir build
- cd build
- cmake ..
- make
- if [ "$TRAVIS_OS_NAME" == "osx" ]; then (brew update && brew install lcov); fi
- gem install coveralls-lcov

script:
- mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=coverage && make
- ctest -V

after_success:
- cd build
- lcov -c -d . -o cov.txt
- lcov -e cov.txt "*/regtools/src/*" -o coverage.run.filtered.1
- lcov -r coverage.run.filtered.1 "*src/utils/*" "*/tests/*" -o coverage.run.filtered
- coveralls-lcov coverage.run.filtered

os:
- linux
- osx
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,11 @@ endif()

add_subdirectory ("${PROJECT_SOURCE_DIR}/src/junctions/")
add_subdirectory ("${PROJECT_SOURCE_DIR}/src/gtf/")
add_subdirectory ("${PROJECT_SOURCE_DIR}/src/variants/")

#The main executable
add_executable (regtools src/regtools.cc)
target_link_libraries (regtools junctions bedtools gtf htslib)
target_link_libraries (regtools junctions variants bedtools gtf htslib)

#Testing
enable_testing()
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[![Build Status](https://travis-ci.org/griffithlab/regtools.svg?branch=master)](https://travis-ci.org/griffithlab/regtools)
[![Documentation Status](https://readthedocs.org/projects/regtools/badge/?version=latest)](https://readthedocs.org/projects/regtools/?badge=latest)
[![Coverage Status](https://coveralls.io/repos/griffithlab/regtools/badge.svg?branch=master&service=github)](https://coveralls.io/github/griffithlab/regtools?branch=master)

#regtools

Expand Down
6 changes: 5 additions & 1 deletion src/gtf/gtf_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ DEALINGS IN THE SOFTWARE. */
#include <fstream>
#include <iostream>
#include <set>
#include <stdexcept>
#include <vector>
#include <algorithm>
#include "common.h"
Expand Down Expand Up @@ -66,7 +67,10 @@ Gtf GtfParser::parse_exon_line(string line) {
Gtf gtf1;
vector<string> fields;
Tokenize(line, fields);
assert(fields.size() == 9);
if(fields.size() != 9) {
cerr << line << endl << fields.size();
throw runtime_error("Expected 9 fields in GTF line.");
}
if(fields[2] != "exon") {
gtf1.is_exon = false;
return gtf1;
Expand Down
62 changes: 14 additions & 48 deletions src/junctions/junctions_annotator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,14 @@ void JunctionsAnnotator::close_ofstream() {
//If output file is empty, set to cout
void JunctionsAnnotator::set_ofstream_object(ofstream &out) {
if(output_file_ == "NA") {
out.copyfmt(cout);
out.basic_ios<char>::rdbuf(cout.rdbuf());
out.clear(cout.rdstate());
copy_stream(cout, out);
return;
}
ofs_.open(output_file_.c_str());
if(!ofs_.is_open())
throw runtime_error("Unable to open " +
output_file_);
out.copyfmt(ofs_);
out.basic_ios<char>::rdbuf(ofs_.rdbuf());
out.clear(ofs_.rdstate());
copy_stream(ofs_, out);
}

//Open junctions file
Expand Down Expand Up @@ -129,7 +125,7 @@ bool JunctionsAnnotator::read_gtf() {
}

//Find overlap between transcript and junction on the negative strand,
//function returns true if this is a known junction in the transcript
//function returns true if either the acceptor or the donor is known
bool JunctionsAnnotator::overlap_ps(const vector<BED>& exons,
AnnotatedJunction & junction) {
//skip single exon genes
Expand All @@ -141,158 +137,132 @@ bool JunctionsAnnotator::overlap_ps(const vector<BED>& exons,
exons[exons.size() - 1].end < junction.start)
return false;
for(std::size_t i = 0; i < exons.size(); i++) {
cerr << endl << "exon number " << i << "\t" << exons[i].start
<< "\t" << exons[i].end;
if(exons[i].start > junction.end) {
cerr << endl << "-1";
//No need to look any further
//the rest of the exons are outside the junction
break;
}
//known junction
if(exons[i].end == junction.start &&
exons[i + 1].start == junction.end) {
cerr << endl << "DA";
junction.anchor = "DA";
junction.known_acceptor = true;
junction.known_donor = true;
junction.known_junction = true;
known_junction = true;
}
else {
cerr << endl << "0";
if(!junction_start) {
if(exons[i].end >= junction.start) {
junction_start = true;
cerr << endl << "1";
}
}
if(junction_start) {
if(exons[i].start > junction.start &&
exons[i].end < junction.end) {
cerr << endl << "2";
junction.exons_skipped.insert(exons[i].name);
}
if(exons[i].start > junction.start) {
cerr << endl << "3";
junction.donors_skipped.insert(exons[i].start);
}
if(exons[i].end < junction.end) {
cerr << endl << "4";
junction.acceptors_skipped.insert(exons[i].end);
}
if(exons[i].end == junction.start) {
cerr << endl << "5";
junction.known_donor = true;
}
//TODO - check for last exon
if(exons[i].start == junction.end) {
cerr << endl << "6";
junction.known_acceptor = true;
}
}
}
}
annotate_anchor(junction);
return known_junction;
return (junction.anchor != "N");
}

//Find overlap between transcript and junction on the negative strand,
//function returns true if this is a known junction in the transcript
//function returns true if either the acceptor or the donor is known
bool JunctionsAnnotator::overlap_ns(const vector<BED> & exons,
AnnotatedJunction & junction) {
cerr << endl << "in negative overlap";
//skip single exon genes
if(skip_single_exon_genes_ && exons.size() == 1) return false;
bool junction_start = false;
bool known_junction = false;
//check if transcript overlaps with junction
if(exons[0].end < junction.start ||
exons[exons.size() - 1].start > junction.end) {
cerr << endl << "transcript outside junction";
return known_junction;
}
for(std::size_t i = 0; i < exons.size(); i++) {
if(exons[i].end < junction.start) {
cerr << endl << "-1";
//No need to look any further
//the rest of the exons are outside the junction
break;
}
//Check if this is a known junction
if(exons[i].start == junction.end &&
exons[i + 1].end == junction.start) {
cerr << endl << "DA";
junction.anchor = "DA";
junction.known_acceptor = true;
junction.known_donor = true;
junction.known_junction = true;
known_junction = true;
}
else {
cerr << endl << "0\t" << i;
if(!junction_start) {
if(exons[i].start <= junction.end) {
cerr << endl << "1";
junction_start = true;
}
}
if(junction_start) {
if(exons[i].start > junction.start &&
exons[i].end < junction.end) {
cerr << endl << "2";
junction.exons_skipped.insert(exons[i].name);
}
if(exons[i].start > junction.start) {
cerr << endl << "3";
junction.donors_skipped.insert(exons[i].start);
}
if(exons[i].end < junction.end) {
cerr << endl << "4";
junction.acceptors_skipped.insert(exons[i].end);
}
if(exons[i].start == junction.end) {
cerr << endl << "5";
junction.known_donor = true;
}
if(exons[i].end == junction.start) {
cerr << endl << "6";
junction.known_acceptor = true;
}
}
}
}
annotate_anchor(junction);
return known_junction;
return (junction.anchor != "N");
}

//Annotate the anchor
//Annotate the anchor i.e is this a known/novel donor-acceptor pair
void JunctionsAnnotator::annotate_anchor(AnnotatedJunction & junction) {
//check if known junction
if(!junction.known_junction) {
junction.anchor = string("N");
if(junction.known_junction) {
junction.anchor = "DA";
} else {
if(junction.known_donor)
if(junction.known_acceptor)
junction.anchor = string("NDA");
else
junction.anchor = string("D");
else if(junction.known_acceptor)
junction.anchor = string("A");
else
junction.anchor = string("N");
}
}

//Check for overlap between a transcript and junctions
//Check if the junction we saw is a known junction
//Calculate exons_skipped, donors_skipped, acceptors_skipped
void JunctionsAnnotator::check_for_overlap(string transcript_id, AnnotatedJunction & junction) {
cerr << "\nTranscript id: " << transcript_id;
const vector<BED> & exons =
gtf_.get_exons_from_transcript(transcript_id);
if(!exons.size()) {
cerr << "Unexpected error. No exons for transcript "
<< transcript_id;
exit(1);
throw runtime_error("Unexpected error. No exons for transcript "
+ transcript_id);
}
string transcript_strand = exons[0].strand;
//Make sure the strands of the junction and transcript match
Expand All @@ -312,8 +282,7 @@ void JunctionsAnnotator::check_for_overlap(string transcript_id, AnnotatedJuncti
gtf_.get_gene_from_transcript(transcript_id));
}
} else {
cerr << "\nUnknown strand " << junction.strand;
exit(1);
throw runtime_error("\nUnknown strand " + junction.strand);
}
cerr << "\n\tDonors skipped " << junction.donors_skipped.size();
cerr << "\n\tExons skipped " << junction.exons_skipped.size();
Expand All @@ -324,12 +293,10 @@ void JunctionsAnnotator::check_for_overlap(string transcript_id, AnnotatedJuncti
//Annotate with gtf
//Takes a single junction BED and annotates with GTF
void JunctionsAnnotator::annotate_junction_with_gtf(AnnotatedJunction & j1) {
BIN junction_bin = getBin(j1.start, j1.end);
//From BedTools
BIN start_bin, end_bin;
start_bin = (j1.start >> _binFirstShift);
end_bin = ((j1.end-1) >> _binFirstShift);
cerr << endl << "junction_bin " << junction_bin;
//We loop through each UCSC BIN level for feature A's chrom.
//For each BIN, we loop through each B feature and add it to
// hits if it meets all of the user's requests.
Expand Down Expand Up @@ -379,7 +346,6 @@ int JunctionsAnnotator::parse_options(int argc, char *argv[]) {
break;
default:
usage();
cerr << c << " here1";
throw runtime_error("\nError parsing inputs!");
}
}
Expand Down
23 changes: 23 additions & 0 deletions src/junctions/junctions_annotator.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ struct AnnotatedJunction : BED {
bool known_acceptor;
//Is this a known junction
bool known_junction;
//Annotation - Exonic/Intronic etc.
string annotation;
//Print the header line
static void print_header(ostream& out) {
out << "chrom" << "\t" << "start" <<
Expand Down Expand Up @@ -103,6 +105,7 @@ struct AnnotatedJunction : BED {
//Clear the contents of the junction
void reset() {
anchor = string("N");
annotation = "";
splice_site = "";
known_donor = false;
known_acceptor = false;
Expand All @@ -113,8 +116,28 @@ struct AnnotatedJunction : BED {
transcripts_overlap.clear();
genes_overlap.clear();
}
//constructor
AnnotatedJunction() {
reset();
}
//constructor
AnnotatedJunction(string chr1,
uint32_t start1,
uint32_t end1) {
chrom = chr1;
start = start1;
end = end1;
}
};

//Copy one stream object into another
inline
void copy_stream(const ostream &source, ostream &dest) {
dest.copyfmt(source);
dest.basic_ios<char>::rdbuf(source.rdbuf());
dest.clear(source.rdstate());
}

//The class that does all the annotation
//Uses a GTF parser object to annotate a junction.
class JunctionsAnnotator {
Expand Down
8 changes: 4 additions & 4 deletions src/junctions/junctions_creator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,13 +324,13 @@ int JunctionsCreator::identify_junctions_from_BAM() {
//open BAM for reading
samFile *in = sam_open(bam_.c_str(), "r");
if(in == NULL) {
return 1;
throw runtime_error("Unable to open BAM/SAM file.");
}
//Load the index
hts_idx_t *idx = sam_index_load(in, bam_.c_str());
if(idx == NULL) {
cerr << "Unable to load alignment index. Please index with Samtools.";
return -1;
throw runtime_error("Unable to open BAM/SAM index."
" Make sure alignments are indexed");
}
//Get the header
bam_hdr_t *header = sam_hdr_read(in);
Expand All @@ -340,7 +340,7 @@ int JunctionsCreator::identify_junctions_from_BAM() {
iter = sam_itr_querys(idx, header, region_.c_str());
if(header == NULL || iter == NULL) {
sam_close(in);
return 1;
throw runtime_error("Unable to iterate to region within BAM.");
}
//Initiate the alignment record
bam1_t *aln = bam_init1();
Expand Down
7 changes: 2 additions & 5 deletions src/junctions/junctions_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,13 @@ int junctions_create(int argc, char *argv[]) {
JunctionsCreator create;
try {
create.parse_options(argc, argv);
create.identify_junctions_from_BAM();
create.print_all_junctions();
} catch(const runtime_error& error) {
cerr << error.what();
create.usage();
return 1;
}
if(create.identify_junctions_from_BAM()) {
create.usage();
return 1;
}
create.print_all_junctions();
return 0;
}

Expand Down