Skip to content

Commit

Permalink
fix up maf export to properly iterate over multiple sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Oct 4, 2012
1 parent a01ef15 commit f0f47e6
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 75 deletions.
2 changes: 1 addition & 1 deletion api/hdf5_impl/hdf5Genome.cpp
Expand Up @@ -548,7 +548,7 @@ ColumnIteratorConstPtr HDF5Genome::getColumnIterator(
lastPosition >= (hal_index_t)(getSequenceLength()))
{
stringstream ss;
ss << "HDF5Genome::getColumnIteratorsetString: input indices "
ss << "HDF5Genome::getColumnIterator: input indices "
<< "(" << position << ", " << lastPosition << ") out of bounds";
throw hal_exception(ss.str());
}
Expand Down
2 changes: 1 addition & 1 deletion api/hdf5_impl/hdf5Sequence.cpp
Expand Up @@ -228,7 +228,7 @@ ColumnIteratorConstPtr HDF5Sequence::getColumnIterator(
lastPosition >= (hal_index_t)(getStartPosition() + getSequenceLength()))
{
stringstream ss;
ss << "HDF5Sequence::getColumnIteratorsetString: input indices "
ss << "HDF5Sequence::getColumnIterators: input indices "
<< "(" << position << ", " << lastPosition << ") out of bounds";
throw hal_exception(ss.str());
}
Expand Down
16 changes: 5 additions & 11 deletions maf/impl/hal2maf.cpp
Expand Up @@ -112,11 +112,13 @@ int main(int argc, char** argv)
{
refGenome = alignment->openGenome(alignment->getRootName());
}
const SegmentedSequence* ref = refGenome;

const Sequence* refSequence = NULL;
if (refSequenceName != "\"\"")
{
refSequence = refGenome->getSequence(refSequenceName);
ref = refSequence;
if (refSequence == NULL)
{
throw hal_exception(string("Reference sequence, ") + refSequenceName +
Expand All @@ -134,17 +136,9 @@ int main(int argc, char** argv)
MafExport mafExport;
mafExport.setMaxRefGap(maxRefGap);
mafExport.setNoDupes(noDupes);

if (refSequence == NULL)
{
mafExport.convertGenome(mafStream, alignment, refGenome,
start, length, rootGenome);
}
else
{
mafExport.convertSequence(mafStream, alignment, refSequence,
start, length, rootGenome);
}

mafExport.convertSegmentedSequence(mafStream, alignment, ref,
start, length, rootGenome);

}
catch(hal_exception& e)
Expand Down
65 changes: 16 additions & 49 deletions maf/impl/halMafExport.cpp
Expand Up @@ -41,67 +41,34 @@ void MafExport::writeHeader()
}
}

void MafExport::convertGenome(ostream& mafStream,
AlignmentConstPtr alignment,
const Genome* genome,
hal_index_t startPosition,
hal_size_t length,
const Genome* root)
void MafExport::convertSegmentedSequence(ostream& mafStream,
AlignmentConstPtr alignment,
const SegmentedSequence* seq,
hal_index_t startPosition,
hal_size_t length,
const Genome* root)
{
if (length == 0)
{
length = genome->getSequenceLength() - startPosition;
}
if ((hal_size_t)startPosition + length > genome->getSequenceLength())
assert(seq != NULL);
if (startPosition >= (hal_index_t)seq->getSequenceLength() ||
(hal_size_t)startPosition + length > seq->getSequenceLength())
{
throw hal_exception("Invalid range specified for convertGenome");
}
_mafStream = &mafStream;
_alignment = alignment;
writeHeader();
hal_index_t pos = startPosition;
hal_size_t doneLen = 0;

while (doneLen < length)
{
assert(pos < (hal_index_t)genome->getSequenceLength());
const Sequence* sequence = genome->getSequenceBySite(pos);
assert(sequence != NULL);
hal_index_t seqStart = pos - sequence->getStartPosition();
hal_size_t curLen = min(length - doneLen,
sequence->getSequenceLength() - seqStart);
assert(curLen > 0);
convertSequence(mafStream, alignment, sequence, seqStart, curLen, root);
pos += curLen;
doneLen += curLen;
}
}

void MafExport::convertSequence(ostream& mafStream,
AlignmentConstPtr alignment,
const Sequence* sequence,
hal_index_t startPosition,
hal_size_t length,
const Genome* root)
{
if (length == 0)
{
length = sequence->getSequenceLength();
}
if ((hal_size_t)startPosition + length > sequence->getSequenceLength())
{
throw hal_exception("Invalid range specified for convertSequence");
length = seq->getSequenceLength() - startPosition;
}
hal_index_t lastPosition = startPosition + (hal_index_t)(length - 1);

_mafStream = &mafStream;
_alignment = alignment;
writeHeader();

ColumnIteratorConstPtr colIt = sequence->getColumnIterator(root,
_maxRefGap,
startPosition,
lastPosition,
_noDupes);
ColumnIteratorConstPtr colIt = seq->getColumnIterator(root,
_maxRefGap,
startPosition,
lastPosition,
_noDupes);
_mafBlock.initBlock(colIt);
assert(_mafBlock.canAppendColumn(colIt) == true);

Expand Down
19 changes: 6 additions & 13 deletions maf/inc/halMafExport.h
Expand Up @@ -22,19 +22,12 @@ class MafExport

virtual ~MafExport();

void convertGenome(std::ostream& mafStream,
AlignmentConstPtr alignment,
const Genome* genome,
hal_index_t startPosition = 0,
hal_size_t length = 0,
const Genome* root = NULL);

void convertSequence(std::ostream& mafStream,
AlignmentConstPtr alignment,
const Sequence* sequence,
hal_index_t startPosition = 0,
hal_size_t length = 0,
const Genome* root = NULL);
void convertSegmentedSequence(std::ostream& mafStream,
AlignmentConstPtr alignment,
const SegmentedSequence* seq,
hal_index_t startPosition,
hal_size_t length,
const Genome* root);

void setMaxRefGap(hal_size_t maxRefGap);
void setNoDupes(bool noDupes);
Expand Down

0 comments on commit f0f47e6

Please sign in to comment.