Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

conserved 4d site extraction

Also reading frame preservation, fixes, and tests
  • Loading branch information...
commit cc85a6a60d680ecb5cccd681ff7283d7dd6f9cd5 1 parent ab5d998
@joelarmstrong joelarmstrong authored
View
10 extract/Makefile
@@ -1,7 +1,12 @@
rootPath = ../
include ../include.mk
-all : ${binPath}/halExtract ${binPath}/halAlignedExtract ${binPath}/halMaskExtract ${binPath}/hal4dExtract
+libTestSources = $(wildcard tests/*.cpp)
+libTestHeaders = $(wildcard tests/*.h)
+libTestsCommon = ${rootPath}/api/tests/halAlignmentTest.cpp ${rootPath}/api/tests/halAlignmentInstanceTest.cpp
+libTestsCommonHeaders = ${rootPath}/api/tests/halAlignmentTest.h ${rootPath}/api/tests/halAlignmentInstanceTest.h ${rootPath}/api/tests/allTests.h
+
+all : ${binPath}/halExtract ${binPath}/halAlignedExtract ${binPath}/halMaskExtract ${binPath}/hal4dExtract ${binPath}/hal4dExtractTest
clean :
rm -f ${binPath}/halExtract ${binPath}/halAlignedExtract ${binPath}/halMaskExtract ${binPath}/hal4dExtract
@@ -17,3 +22,6 @@ ${binPath}/halMaskExtract : impl/halMaskExtractMain.cpp impl/halMaskExtractor.c
${binPath}/hal4dExtract : impl/hal4dExtractMain.cpp impl/hal4dExtract.cpp inc/hal4dExtract.h ${libPath}/halLiftover.a ${libPath}/halLib.a ${basicLibsDependencies}
${cpp} ${cppflags} -I inc -I impl -I ${libPath} -I impl -I tests -o ${binPath}/hal4dExtract impl/hal4dExtractMain.cpp impl/hal4dExtract.cpp ${libPath}/halLiftover.a ${libPath}/halLib.a ${basicLibs}
+
+${binPath}/hal4dExtractTest : impl/hal4dExtract.cpp tests/hal4dExtractTest.cpp ${libTestSources} ${libTestHeaders} ${libTestsCommon} ${libTestsHeadersCommon} ${libPath}/halLiftover.a ${libPath}/halLib.a ${basicLibsDependencies}
+ ${cpp} ${cppflags} -I inc -I impl -I ${libPath} -I tests -I ../api/tests -o ${binPath}/hal4dExtractTest impl/hal4dExtract.cpp ${libTestsCommon} tests/hal4dExtractTest.cpp ${libPath}/halLiftover.a ${libPath}/halLib.a ${basicLibs}
View
268 extract/impl/hal4dExtract.cpp
@@ -25,7 +25,7 @@ Extract4d::~Extract4d()
void Extract4d::run(const Genome* refGenome,
istream* inBedStream, ostream* outBedStream,
- int bedVersion)
+ int bedVersion, bool conserved)
{
_refGenome = refGenome;
_outBedStream = outBedStream;
@@ -34,6 +34,7 @@ void Extract4d::run(const Genome* refGenome,
{
_bedVersion = getBedVersion(inBedStream);
}
+ _conserved = conserved;
scan(inBedStream, _bedVersion);
}
@@ -52,12 +53,20 @@ void Extract4d::visitLine()
}
else
{
- extractBed4d();
+ if (_conserved) {
+ extractConservedBed4d();
+ } else {
+ extractBed4d();
+ }
}
}
else
{
- extractBlocks4d();
+ if (_conserved) {
+ extractConservedBlocks4d();
+ } else {
+ extractBlocks4d();
+ }
}
}
else if (_refSequence == NULL)
@@ -69,6 +78,9 @@ void Extract4d::visitLine()
write();
}
+// why keep a unconserved version around?: the column iterator seems
+// to fail in the case of a single root genome. Not a common use case
+// but it is used in the tests.
void Extract4d::extractBed4d()
{
char c1;
@@ -115,16 +127,212 @@ void Extract4d::extractBed4d()
}
}
-void Extract4d::extractBlocks4d()
+void Extract4d::extractConservedBed4d()
+{
+ hal_index_t start = _bedLine._start;
+ hal_index_t end = _bedLine._end;
+ hal_size_t len = end - start;
+ hal_size_t N = len/3;
+ --end;
+ ColumnIteratorConstPtr colIt = _refSequence->getColumnIterator(NULL, 0, start, NULL_INDEX, false, true, _bedLine._strand == '-');
+
+ for (hal_size_t i = 0; i < N; ++i)
+ {
+ bool is4dSite = true;
+ if (_bedLine._strand == '+') {
+ // shift to 4d site location
+ colIt->toRight();
+ colIt->toRight();
+ }
+ const ColumnIterator::ColumnMap *colMap = colIt->getColumnMap();
+ for (ColumnIterator::ColumnMap::const_iterator colMapIt = colMap->begin();
+ colMapIt != colMap->end(); ++colMapIt) {
+ const ColumnIterator::DNASet *dnaSet = colMapIt->second;
+ for (hal_size_t j = 0; j < dnaSet->size(); j++) {
+ char c1, c2;
+ DNAIteratorConstPtr dna = dnaSet->at(j);
+ if ((dna->getReversed() && dna->getArrayIndex() > colMapIt->first->getEndPosition() - 2) ||
+ (!dna->getReversed() && dna->getArrayIndex() < colMapIt->first->getStartPosition() + 2)) {
+ is4dSite = false;
+ break;
+ }
+ dna->toLeft();
+ c2 = dna->getChar();
+ dna->toLeft();
+ c1 = dna->getChar();
+ if (!isFourfoldDegenerate(c1, c2)) {
+ is4dSite = false;
+ break;
+ }
+ }
+ }
+ if (is4dSite)
+ {
+ hal_index_t pos = colIt->getReferenceSequencePosition();
+ _outBedLines.push_back(_bedLine);
+ _outBedLines.back()._start = pos;
+ _outBedLines.back()._end = pos + 1;
+ }
+ if (_bedLine._strand == '-') {
+ colIt->toRight();
+ colIt->toRight();
+ }
+ if (!colIt->lastColumn()) {
+ colIt->toRight();
+ }
+ }
+}
+
+// NB: Throws out 4d sites that occur in a codon split across exon
+// boundaries. Otherwise the data would need to be single-copy per
+// sequence (not so bad).
+void Extract4d::extractConservedBlocks4d()
{
_outBedLines.push_back(_bedLine);
assert(_outBedLines.size() == 1);
_outBedLines.back()._blocks.clear();
+ bool reversed = _bedLine._strand == '-';
+ hal_index_t frame = 0;
+ deque<BedBlock> buffer;
+ bool splitCodon = false;
+ // Ugly but way more compact. There is probably a much better way of
+ // doing this...
+ for (int64_t i = reversed ? _bedLine._blocks.size() - 1 : 0;
+ reversed ? i >= 0 : i < _bedLine._blocks.size();
+ reversed ? --i : ++i)
+ {
+ if (_bedLine._blocks[i]._length == 0 ||
+ _bedLine._blocks[i]._start + _bedLine._blocks[i]._length >
+ (hal_index_t)_refSequence->getSequenceLength())
+ {
+ cerr << "Line " << _lineNumber << ", block " << i
+ <<": BED coordinates invalid\n";
+ }
+ else
+ {
+ hal_index_t start = _bedLine._blocks[i]._start;
+ hal_index_t end =
+ _bedLine._blocks[i]._start + _bedLine._blocks[i]._length;
+ --end;
+ if (reversed) {
+ swap(start, end);
+ }
+ ColumnIteratorConstPtr colIt = _refSequence->getColumnIterator(NULL, 0, start, NULL_INDEX, false, true, _bedLine._strand == '-');
+ for (hal_index_t n = 0; n < _bedLine._blocks[i]._length; ++n)
+ {
+ if (frame == 2) {
+ bool is4dSite = true;
+ const ColumnIterator::ColumnMap *colMap = colIt->getColumnMap();
+ for (ColumnIterator::ColumnMap::const_iterator colMapIt = colMap->begin();
+ colMapIt != colMap->end(); ++colMapIt) {
+ const ColumnIterator::DNASet *dnaSet = colMapIt->second;
+ for (hal_size_t j = 0; j < dnaSet->size(); j++) {
+ char c1, c2;
+ DNAIteratorConstPtr dna = dnaSet->at(j);
+ if ((dna->getReversed() && dna->getArrayIndex() > colMapIt->first->getEndPosition() - 2) ||
+ (!dna->getReversed() && dna->getArrayIndex() < colMapIt->first->getStartPosition() + 2)) {
+ is4dSite = false;
+ break;
+ }
+ dna->toLeft();
+ c2 = dna->getChar();
+ dna->toLeft();
+ c1 = dna->getChar();
+ if (!isFourfoldDegenerate(c1, c2)) {
+ is4dSite = false;
+ break;
+ }
+ }
+ frame = 0;
+ }
+ if (is4dSite && !splitCodon)
+ {
+ BedBlock block;
+ block._start = colIt->getReferenceSequencePosition() -
+ _refSequence->getStartPosition();
+ block._length = 1;
+ if (reversed) {
+ buffer.push_front(block);
+ } else {
+ buffer.push_back(block);
+ }
+ }
+ } else {
+ frame++;
+ }
+ splitCodon = false;
+ if (n != _bedLine._blocks[i]._length - 1) {
+ if (reversed) {
+ colIt->toSite(colIt->getReferenceSequencePosition() + _refSequence->getStartPosition() - 1,
+ colIt->getReferenceSequencePosition() + _refSequence->getStartPosition(),
+ false);
+ } else {
+ colIt->toRight();
+ }
+ }
+ }
+ if (frame != 0) {
+ splitCodon = true;
+ }
+ }
+ }
+ for (size_t j = 0; j < buffer.size(); ++j)
+ {
+ _outBedLines.back()._blocks.push_back(buffer[j]);
+ }
- for (size_t i = 0; i < _bedLine._blocks.size(); ++i)
+ if (_outBedLines.back()._blocks.size() > 0)
+ {
+ BedLine& bl = _outBedLines.back();
+ hal_index_t startDelta = _outBedLines.back()._blocks[0]._start;
+ if (startDelta > 0)
+ {
+ vector<BedBlock>::iterator j;
+ vector<BedBlock>::iterator k;
+ for (j = bl._blocks.begin(); j != bl._blocks.end(); ++j)
+ {
+ j->_start -= startDelta;
+ k = j;
+ ++k;
+ assert(k == bl._blocks.end() ||
+ k->_start >= (j->_start + j->_length));
+ }
+ bl._start += startDelta;
+ }
+ assert(bl._blocks[0]._start == 0);
+
+ hal_index_t endDelta = bl._end - (bl._blocks.back()._start +
+ bl._blocks.back()._length +
+ bl._start);
+ assert(endDelta >= 0);
+ bl._end -= endDelta;
+ assert(bl._blocks.back()._start +
+ bl._blocks.back()._length + bl._start == bl._end);
+
+ assert(_outBedLines.size() == 1);
+ }
+}
+
+// again, keeping this around because of the single-genome alignment
+// issue.. otherwise conserving among just 1 genome would be
+// equivalent to this
+void Extract4d::extractBlocks4d()
+{
+ _outBedLines.push_back(_bedLine);
+ assert(_outBedLines.size() == 1);
+ _outBedLines.back()._blocks.clear();
+ hal_index_t frame = 0;
+ bool reversed = _bedLine._strand == '-';
+ char currCodon[2] = { '\0', '\0' };
+ deque<BedBlock> buffer;
+ // Ugly but way more compact. There is probably a much better way of
+ // doing this...
+ for (int64_t i = reversed ? _bedLine._blocks.size() - 1 : 0;
+ reversed ? i >= 0 : i < _bedLine._blocks.size();
+ reversed ? --i : ++i)
{
if (_bedLine._blocks[i]._length == 0 ||
- _bedLine._blocks[i]._start + _bedLine._blocks[i]._length >=
+ _bedLine._blocks[i]._start + _bedLine._blocks[i]._length >
(hal_index_t)_refSequence->getSequenceLength())
{
cerr << "Line " << _lineNumber << ", block " << i
@@ -132,12 +340,9 @@ void Extract4d::extractBlocks4d()
}
else
{
- char c1;
- char c2;
hal_index_t start = _bedLine._blocks[i]._start;
hal_index_t end =
_bedLine._blocks[i]._start + _bedLine._blocks[i]._length;
- hal_size_t N = _bedLine._blocks[i]._length / 3;
--end;
if (_bedLine._strand == '-')
{
@@ -149,37 +354,38 @@ void Extract4d::extractBlocks4d()
dna->toReverse();
}
- deque<BedBlock> buffer;
- for (hal_size_t i = 0; i < N; ++i)
+ for (hal_index_t n = 0; n < _bedLine._blocks[i]._length; ++n)
{
- c1 = dna->getChar();
- dna->toRight();
- c2 = dna->getChar();
- dna->toRight();
- if (isFourfoldDegenerate(c1, c2) == true)
- {
- BedBlock block;
- block._start = dna->getArrayIndex() -
- _refSequence->getStartPosition();
- block._length = 1;
- if (_bedLine._strand == '-')
+ if (frame == 2) {
+ if (isFourfoldDegenerate(currCodon[0], currCodon[1]) == true)
{
- buffer.push_front(block);
- }
- else
- {
- buffer.push_back(block);
+ BedBlock block;
+ block._start = dna->getArrayIndex() -
+ _refSequence->getStartPosition();
+ block._length = 1;
+ if (_bedLine._strand == '-')
+ {
+ buffer.push_front(block);
+ }
+ else
+ {
+ buffer.push_back(block);
+ }
}
+ frame = 0;
+ } else {
+ currCodon[frame] = dna->getChar();
+ frame++;
}
dna->toRight();
}
- for (size_t j = 0; j < buffer.size(); ++j)
- {
- _outBedLines.back()._blocks.push_back(buffer[j]);
- }
}
}
+ for (size_t j = 0; j < buffer.size(); ++j)
+ {
+ _outBedLines.back()._blocks.push_back(buffer[j]);
+ }
if (_outBedLines.back()._blocks.size() > 0)
{
BedLine& bl = _outBedLines.back();
View
8 extract/impl/hal4dExtractMain.cpp
@@ -32,7 +32,9 @@ static CLParserPtr initParser()
optionsParser->addOptionFlag("append",
"append to instead of overwrite output file.",
false);
-
+ optionsParser->addOptionFlag("conserved",
+ "ensure 4d sites are 4d sites in all leaf genomes",
+ false);
return optionsParser;
}
@@ -46,6 +48,7 @@ int main(int argc, char** argv)
string outBedPath;
int bedVersion;
bool append;
+ bool conserved;
try
{
optionsParser->parseOptions(argc, argv);
@@ -55,6 +58,7 @@ int main(int argc, char** argv)
outBedPath = optionsParser->getArgument<string>("outBed");
bedVersion = optionsParser->getOption<int>("bedVersion");
append = optionsParser->getFlag("append");
+ conserved = optionsParser->getFlag("conserved");
}
catch(exception& e)
{
@@ -118,7 +122,7 @@ int main(int argc, char** argv)
}
Extract4d extractor;
- extractor.run(genome, inBedStream, outBedStream, bedVersion);
+ extractor.run(genome, inBedStream, outBedStream, bedVersion, conserved);
}
catch(hal_exception& e)
{
View
5 extract/inc/hal4dExtract.h
@@ -27,7 +27,7 @@ class Extract4d : public BedScanner
void run(const Genome* refGenome,
std::istream* inBedStream, std::ostream* outBedStream,
- int bedVersion = -1);
+ int bedVersion = -1, bool conserved = false);
static const char CodonPrefixTable[2][8];
@@ -36,7 +36,9 @@ class Extract4d : public BedScanner
virtual void visitLine();
void extractBed4d();
+ void extractConservedBed4d();
void extractBlocks4d();
+ void extractConservedBlocks4d();
void write();
@@ -47,6 +49,7 @@ class Extract4d : public BedScanner
const Sequence* _refSequence;
std::deque<BedLine> _outBedLines;
int _bedVersion;
+ bool _conserved;
};
}
View
258 extract/tests/hal4dExtractTest.cpp
@@ -0,0 +1,258 @@
+#include "hal.h"
+#include "hal4dExtract.h"
+#include "hal4dExtractTest.h"
+
+using namespace std;
+using namespace hal;
+
+void ConservedBed4dExtractTest::createCallBack(AlignmentPtr alignment)
+{
+ Genome *root = alignment->addRootGenome("root");
+ Genome *leaf1 = alignment->addLeafGenome("leaf1", "root", 1);
+ Genome *leaf2 = alignment->addLeafGenome("leaf2", "root", 1);
+ vector<Sequence::Info> seqVec(1);
+ seqVec[0] = Sequence::Info("rootSequence", 192, 0, 1);
+ root->setDimensions(seqVec);
+ seqVec[0] = Sequence::Info("leaf1Sequence", 192, 1, 0);
+ leaf1->setDimensions(seqVec);
+ seqVec[0] = Sequence::Info("leaf2Sequence", 192, 1, 0);
+ leaf2->setDimensions(seqVec);
+ // all possible codons, in a single frame -- we will test only a few but make
+ // sure that the correct number of sites is output as well
+ // changed final 4d site, which should change nothing since ancestors are
+ // ignored
+ root->setString("aaaaacaagaatacaaccacgactagaagcaggagtataatcatgattcaacaccagcatccacccccgcctcgacgccggcgtctactcctgcttgaagacgaggatgcagccgcggctggaggcgggggtgtagtcgtggtttaatactagtattcatcctcgttttgatgctggtgtttattcttgttt");
+ BottomSegmentIteratorPtr botIt = root->getBottomSegmentIterator();
+ botIt->setCoordinates(0, 192);
+ botIt->setChildIndex(0, 0);
+ botIt->setChildIndex(1, 0);
+ botIt->setChildReversed(0, false);
+ botIt->setChildReversed(1, false);
+ botIt->setTopParseIndex(NULL_INDEX);
+
+ // added unconserved 4d site at 2, disabled 4d site at 14 and 18/20
+ leaf1->setString("acaaacaagaataaaaccatgactagaagcaggagtataatcatgattcaacaccagcatccacccccgcctcgacgccggcgtctactcctgcttgaagacgaggatgcagccgcggctggaggcgggggtgtagtcgtggtttaatactagtattcatcctcgtcttgatgctggtgtttattcttgttt");
+ TopSegmentIteratorPtr topIt = leaf1->getTopSegmentIterator();
+ topIt->setCoordinates(0, 192);
+ topIt->setParentIndex(0);
+ topIt->setParentReversed(false);
+ topIt->setNextParalogyIndex(NULL_INDEX);
+ topIt->setBottomParseIndex(NULL_INDEX);
+
+ // disabled 4d site at 14
+ leaf2->setString("aaaaacaagaataaaaccacgactagaagcaggagtataatcatgattcaacaccagcatccacccccgcctcgacgccggcgtctactcctgcttgaagacgaggatgcagccgcggctggaggcgggggtgtagtcgtggtttaatactagtattcatcctcgtcttgatgctggtgtttattcttgttt");
+ topIt = leaf2->getTopSegmentIterator();
+ topIt->setCoordinates(0, 192);
+ topIt->setParentIndex(0);
+ topIt->setParentReversed(false);
+ topIt->setNextParalogyIndex(NULL_INDEX);
+ topIt->setBottomParseIndex(NULL_INDEX);
+}
+
+void ConservedBed4dExtractTest::checkCallBack(AlignmentConstPtr alignment)
+{
+ const Genome *genome = alignment->openGenome("root");
+ stringstream bedFile("rootSequence\t0\t192\tFORWARD\t0\t+\n"
+ "rootSequence\t0\t192\tREV\t0\t-\n");
+ stringstream outStream;
+ Extract4d extract;
+ extract.run(genome, &bedFile, &outStream, -1, true);
+ vector<string> streamResults = chopString(outStream.str(), "\n");
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t14\t15\tFORWARD\t0\t+") == streamResults.end());
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t177\t178\tREV\t0\t-") != streamResults.end());
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t18\t19\tREV\t0\t-") == streamResults.end());
+ // 14, 18(+ strand) /20 (- strand) disabled, so 3 are missing
+ CuAssertTrue(_testCase, streamResults.size() == 61);
+}
+
+void Bed4dExtractTest::createCallBack(AlignmentPtr alignment)
+{
+ Genome *genome = alignment->addRootGenome("root");
+ vector<Sequence::Info> seqVec(1);
+ seqVec[0] = Sequence::Info("rootSequence", 192, 0, 0);
+ genome->setDimensions(seqVec);
+ // all possible codons, in a single frame -- we will test only a few but make
+ // sure that the correct number of sites is output as well
+ genome->setString("aaaaacaagaatacaaccacgactagaagcaggagtataatcatgattcaacaccagcatccacccccgcctcgacgccggcgtctactcctgcttgaagacgaggatgcagccgcggctggaggcgggggtgtagtcgtggtttaatactagtattcatcctcgtcttgatgctggtgtttattcttgttt");
+}
+
+void Bed4dExtractTest::checkCallBack(AlignmentConstPtr alignment)
+{
+ const Genome *genome = alignment->openGenome("root");
+ stringstream bedFile("rootSequence\t0\t192\tFORWARD\t0\t+\n"
+ "rootSequence\t0\t192\tREV\t0\t-\n");
+ stringstream outStream;
+ Extract4d extract;
+ extract.run(genome, &bedFile, &outStream, -1);
+ vector<string> streamResults = chopString(outStream.str(), "\n");
+ cout << outStream.str() << endl;
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t14\t15\tFORWARD\t0\t+") != streamResults.end());
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t177\t178\tREV\t0\t-") != streamResults.end());
+ CuAssertTrue(_testCase, streamResults.size() == 64);
+}
+
+void Block4dExtractTest::createCallBack(AlignmentPtr alignment)
+{
+ Genome *genome = alignment->addRootGenome("root");
+ vector<Sequence::Info> seqVec(1);
+ seqVec[0] = Sequence::Info("rootSequence", 192, 0, 0);
+ genome->setDimensions(seqVec);
+ // all possible codons, in a single frame -- we will test only a few but make
+ // sure that the correct number of sites is output as well
+ genome->setString("aaaaacaagaatacaaccacgactagaagcaggagtataatcatgattcaacaccagcatccacccccgcctcgacgccggcgtctactcctgcttgaagacgaggatgcagccgcggctggaggcgggggtgtagtcgtggtttaatactagtattcatcctcgtcttgatgctggtgtttattcttgttt");
+}
+
+void Block4dExtractTest::checkCallBack(AlignmentConstPtr alignment)
+{
+ const Genome *genome = alignment->openGenome("root");
+ // test frame shift
+ stringstream bedFile("rootSequence\t0\t192\tFORWARD\t0\t+\t0\t192\t0\t3\t17,6,7\t0,30,60\n"
+ "rootSequence\t0\t192\tREV\t0\t-\t0\t192\t0\t2\t13,17\t0,175");
+ stringstream outStream;
+ Extract4d extract;
+ extract.run(genome, &bedFile, &outStream, -1);
+ vector<string> streamResults = chopString(outStream.str(), "\n");
+ cout << outStream.str() << endl;
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t14\t67\tFORWARD\t0\t+\t0\t192\t0,0,0\t5\t1,1,1,1,1\t0,16,19,46,52") != streamResults.end());
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t3\t178\tREV\t0\t-\t0\t192\t0,0,0\t4\t1,1,1,1\t0,3,9,174") != streamResults.end());
+ CuAssertTrue(_testCase, streamResults.size() == 2);
+}
+
+void ConservedBlock4dExtractTest::createCallBack(AlignmentPtr alignment)
+{
+ Genome *root = alignment->addRootGenome("root");
+ Genome *leaf1 = alignment->addLeafGenome("leaf1", "root", 1);
+ Genome *leaf2 = alignment->addLeafGenome("leaf2", "root", 1);
+ vector<Sequence::Info> seqVec(1);
+ seqVec[0] = Sequence::Info("rootSequence", 192, 0, 1);
+ root->setDimensions(seqVec);
+ seqVec[0] = Sequence::Info("leaf1Sequence", 192, 1, 0);
+ leaf1->setDimensions(seqVec);
+ seqVec[0] = Sequence::Info("leaf2Sequence", 192, 1, 0);
+ leaf2->setDimensions(seqVec);
+ // all possible codons, in a single frame -- we will test only a few but make
+ // sure that the correct number of sites is output as well
+ // changed final 4d site, which should change nothing since ancestors are
+ // ignored
+ root->setString("aaaaacaagaatacaaccacgactagaagcaggagtataatcatgattcaacaccagcatccacccccgcctcgacgccggcgtctactcctgcttgaagacgaggatgcagccgcggctggaggcgggggtgtagtcgtggtttaatactagtattcatcctcgttttgatgctggtgtttattcttgttt");
+ BottomSegmentIteratorPtr botIt = root->getBottomSegmentIterator();
+ botIt->setCoordinates(0, 192);
+ botIt->setChildIndex(0, 0);
+ botIt->setChildIndex(1, 0);
+ botIt->setChildReversed(0, false);
+ botIt->setChildReversed(1, false);
+ botIt->setTopParseIndex(NULL_INDEX);
+
+ // added unconserved 4d site at 2, disabled 4d site at 14 and 18/20
+ leaf1->setString("acaaacaagaataaaaccatgactagaagcaggagtataatcatgattcaacaccagcatccacccccgcctcgacgccggcgtctactcctgcttgaagacgaggatgcagccgcggctggaggcgggggtgtagtcgtggtttaatactagtattcatcctcgtcttgatgctggtgtttattcttgttt");
+ TopSegmentIteratorPtr topIt = leaf1->getTopSegmentIterator();
+ topIt->setCoordinates(0, 192);
+ topIt->setParentIndex(0);
+ topIt->setParentReversed(false);
+ topIt->setNextParalogyIndex(NULL_INDEX);
+ topIt->setBottomParseIndex(NULL_INDEX);
+
+ // disabled 4d site at 14
+ leaf2->setString("aaaaacaagaataaaaccacgactagaagcaggagtataatcatgattcaacaccagcatccacccccgcctcgacgccggcgtctactcctgcttgaagacgaggatgcagccgcggctggaggcgggggtgtagtcgtggtttaatactagtattcatcctcgtcttgatgctggtgtttattcttgttt");
+ topIt = leaf2->getTopSegmentIterator();
+ topIt->setCoordinates(0, 192);
+ topIt->setParentIndex(0);
+ topIt->setParentReversed(false);
+ topIt->setNextParalogyIndex(NULL_INDEX);
+ topIt->setBottomParseIndex(NULL_INDEX);
+}
+
+void ConservedBlock4dExtractTest::checkCallBack(AlignmentConstPtr alignment)
+{
+ const Genome *genome = alignment->openGenome("root");
+ // test frame shift
+ stringstream bedFile("rootSequence\t0\t192\tFORWARD\t0\t+\t0\t192\t0\t3\t17,6,7\t0,30,60\n"
+ "rootSequence\t0\t192\tREV\t0\t-\t0\t192\t0\t2\t13,17\t0,175");
+ stringstream outStream;
+ Extract4d extract;
+ extract.run(genome, &bedFile, &outStream, -1, true);
+ vector<string> streamResults = chopString(outStream.str(), "\n");
+ cout << outStream.str();
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t33\t67\tFORWARD\t0\t+\t0\t192\t0,0,0\t2\t1,1\t0,33") != streamResults.end());
+ CuAssertTrue(_testCase, find(streamResults.begin(), streamResults.end(), "rootSequence\t3\t178\tREV\t0\t-\t0\t192\t0,0,0\t3\t1,1,1\t0,3,174") != streamResults.end());
+ CuAssertTrue(_testCase, streamResults.size() == 2);
+}
+
+void halBlock4dExtractTest(CuTest *testCase)
+{
+// try
+// {
+ Block4dExtractTest tester;
+ tester.check(testCase);
+/* }
+ catch (...)
+ {
+ CuAssertTrue(testCase, false);
+ }*/
+}
+
+void halConservedBlock4dExtractTest(CuTest *testCase)
+{
+// try
+// {
+ ConservedBlock4dExtractTest tester;
+ tester.check(testCase);
+/* }
+ catch (...)
+ {
+ CuAssertTrue(testCase, false);
+ }*/
+}
+
+void halBed4dExtractTest(CuTest *testCase)
+{
+ try
+ {
+ Bed4dExtractTest tester;
+ tester.check(testCase);
+ }
+ catch (...)
+ {
+ CuAssertTrue(testCase, false);
+ }
+}
+
+void halConservedBed4dExtractTest(CuTest *testCase)
+{
+ try
+ {
+ ConservedBed4dExtractTest tester;
+ tester.check(testCase);
+ }
+ catch (...)
+ {
+ CuAssertTrue(testCase, false);
+ }
+}
+
+CuSuite* hal4dExtractTestSuite(void)
+{
+ CuSuite* suite = CuSuiteNew();
+ SUITE_ADD_TEST(suite, halBed4dExtractTest);
+ SUITE_ADD_TEST(suite, halBlock4dExtractTest);
+ SUITE_ADD_TEST(suite, halConservedBed4dExtractTest);
+ SUITE_ADD_TEST(suite, halConservedBlock4dExtractTest);
+ return suite;
+}
+
+int hal4dExtractRunAllTests(void)
+{
+ CuString *output = CuStringNew();
+ CuSuite* suite = CuSuiteNew();
+ CuSuiteAddSuite(suite, hal4dExtractTestSuite());
+ CuSuiteRun(suite);
+ CuSuiteSummary(suite, output);
+ CuSuiteDetails(suite, output);
+ printf("%s\n", output->buffer);
+ return suite->failCount > 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return hal4dExtractRunAllTests();
+}
View
32 extract/tests/hal4dExtractTest.h
@@ -0,0 +1,32 @@
+#ifndef _HAL4DEXTRACTTEST_H
+#define _HAL4DEXTRACTTEST_H
+#include "halAlignmentTest.h"
+
+extern "C" {
+#include "CuTest.h"
+}
+
+struct Bed4dExtractTest : public AlignmentTest
+{
+ void createCallBack(hal::AlignmentPtr alignment);
+ void checkCallBack(hal::AlignmentConstPtr alignment);
+};
+
+struct ConservedBed4dExtractTest : public AlignmentTest
+{
+ void createCallBack(hal::AlignmentPtr alignment);
+ void checkCallBack(hal::AlignmentConstPtr alignment);
+};
+
+struct Block4dExtractTest : public AlignmentTest
+{
+ void createCallBack(hal::AlignmentPtr alignment);
+ void checkCallBack(hal::AlignmentConstPtr alignment);
+};
+
+struct ConservedBlock4dExtractTest : public AlignmentTest
+{
+ void createCallBack(hal::AlignmentPtr alignment);
+ void checkCallBack(hal::AlignmentConstPtr alignment);
+};
+#endif
Please sign in to comment.
Something went wrong with that request. Please try again.