Skip to content

Commit

Permalink
Add code and tests for get_alignment_break_pairs()
Browse files Browse the repository at this point in the history
This finds pairs of breaks in the alignment that are potentially
suitable for doing extra alignment gluing.
  • Loading branch information
tonyelewis committed Aug 11, 2015
1 parent 18144d0 commit 913ab43
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 20 deletions.
16 changes: 8 additions & 8 deletions source/CMakeLists.suffix.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,17 +135,17 @@ if ( DEFINED ENV{BUILD_EXTRA_CATH_TOOLS} )
develtest
executables/develtest/develtest.cpp

alignment/alignment_test.cpp
alignment/io/align_scaffold.cpp
alignment/pair_alignment.cpp
test/alignment_fixture.cpp
${DEVELTEST_ALIGNMENT_SOURCES}

# alignment/tools/alignment_breaks_test.cpp
# alignment/tools/alignment_breaks.cpp
# alignment/alignment_test.cpp
# alignment/io/align_scaffold.cpp
# alignment/pair_alignment.cpp
# test/alignment_fixture.cpp
# ${DEVELTEST_ALIGNMENT_SOURCES}

alignment/tools/alignment_breaks_test.cpp
alignment/tools/alignment_breaks.cpp
alignment/io/align_scaffold.cpp
${DEVELTEST_ALIGNMENT_SOURCES}

# alignment/io/align_scaffold_test.cpp
# alignment/io/align_scaffold.cpp
# test/alignment_fixture.cpp
Expand Down
16 changes: 8 additions & 8 deletions source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1720,17 +1720,17 @@ if ( DEFINED ENV{BUILD_EXTRA_CATH_TOOLS} )
develtest
executables/develtest/develtest.cpp

alignment/alignment_test.cpp
alignment/io/align_scaffold.cpp
alignment/pair_alignment.cpp
test/alignment_fixture.cpp
${DEVELTEST_ALIGNMENT_SOURCES}

# alignment/tools/alignment_breaks_test.cpp
# alignment/tools/alignment_breaks.cpp
# alignment/alignment_test.cpp
# alignment/io/align_scaffold.cpp
# alignment/pair_alignment.cpp
# test/alignment_fixture.cpp
# ${DEVELTEST_ALIGNMENT_SOURCES}

alignment/tools/alignment_breaks_test.cpp
alignment/tools/alignment_breaks.cpp
alignment/io/align_scaffold.cpp
${DEVELTEST_ALIGNMENT_SOURCES}

# alignment/io/align_scaffold_test.cpp
# alignment/io/align_scaffold.cpp
# test/alignment_fixture.cpp
Expand Down
76 changes: 75 additions & 1 deletion source/alignment/tools/alignment_breaks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,23 @@
/// You should have received a copy of the GNU General Public License
/// along with this program. If not, see <http://www.gnu.org/licenses/>.


#include "alignment_breaks.h"

#include <boost/algorithm/string/join.hpp>
#include <boost/range/adaptor/filtered.hpp>

#include <boost/range/irange.hpp>

#include "alignment/alignment.h"
#include "common/algorithm/copy_build.h"
#include "common/algorithm/set_union_build.h"
#include "common/algorithm/sets_are_disjoint.h"
#include "common/boost_addenda/range/adaptor/adjacented.h"
#include "common/size_t_literal.h"

using namespace cath;
using namespace cath::align;
using namespace cath::align::detail;
using namespace cath::common;

using boost::adaptors::filtered;
Expand Down Expand Up @@ -62,3 +64,75 @@ size_vec cath::align::get_alignment_breaks(const alignment &arg_alignment ///< T
)
);
}

/// \brief TODOCUMENT
break_pair_validity_and_future cath::align::detail::check_pair(const alignment &arg_alignment, ///< TODOCUMENT
const size_t &arg_break_one, ///< TODOCUMENT
const size_t &arg_break_two ///< TODOCUMENT
) {
// LM LR MR
// 0 0 0 ok; can extend either way
// 0 0 1 no; can extend right
/// 0 1 0 no; cannot extend
/// 0 1 1 no; cannot extend
// 1 0 0 no; can extend left
// 1 0 1 no; cannot extend
/// 1 1 0 no; cannot extend
/// 1 1 1 no; cannot extend

const size_vec lefts = entries_present_at_index( arg_alignment, arg_break_one - 1 );
const size_vec rights = entries_present_at_index( arg_alignment, arg_break_two );
if ( ! sets_are_disjoint( lefts, rights ) ) {
return { break_pair_validity::BAD, break_pair_future::NEVER_AGAIN };
}

const bool left_and_right_are_all = ( lefts.size() + rights.size() == arg_alignment.num_entries() );
const bool middle_is_empty = ( arg_break_one == arg_break_two );
if ( middle_is_empty ) {
const auto the_future = ( left_and_right_are_all ? break_pair_future::NEVER_AGAIN : break_pair_future::MAYBE_LATER );
return { break_pair_validity::GOOD, the_future };
}
else if ( left_and_right_are_all ) {
return { break_pair_validity::BAD, break_pair_future::NEVER_AGAIN };
}

const size_vec middles = entries_present_in_index_range( arg_alignment, arg_break_one, arg_break_two );
assert( ! middles.empty() );
if ( ! sets_are_disjoint( lefts, middles ) ) {
return { break_pair_validity::BAD, break_pair_future::NEVER_AGAIN };
}
const auto lefts_and_rights = set_union_build<size_vec>( lefts, rights );
const auto lefts_middles_and_rights = set_union_build<size_vec>( middles, lefts_and_rights );
const bool lmr_are_all = lefts_middles_and_rights.size() == arg_alignment.num_entries();
const bool middle_conflicts_right = ! sets_are_disjoint( middles, rights );
return {
( middle_conflicts_right ? break_pair_validity::BAD : break_pair_validity::GOOD ),
( lmr_are_all ? break_pair_future::NEVER_AGAIN : break_pair_future::MAYBE_LATER )
};
}

/// \brief TODOCUMENT
size_size_pair_vec cath::align::get_alignment_break_pairs(const alignment &arg_alignment ///< TODOCUMENT
) {
const auto alignment_breaks = get_alignment_breaks( arg_alignment );
const auto num_alignment_breaks = alignment_breaks.size();

size_size_pair_vec break_pairs;
break_pairs.reserve( num_alignment_breaks );

for (const size_t &idx_one : irange( 0_z, num_alignment_breaks ) ) {
for (const size_t &idx_two : irange( idx_one, num_alignment_breaks ) ) {
const size_t &break_one = alignment_breaks[ idx_one ];
const size_t &break_two = alignment_breaks[ idx_two ];

const auto pair_result = check_pair( arg_alignment, break_one, break_two );
if ( pair_result.first == break_pair_validity::GOOD ) {
break_pairs.emplace_back( break_one, break_two );
}
if ( pair_result.second == break_pair_future::NEVER_AGAIN ) {
break;
}
}
}
return break_pairs;
}
24 changes: 21 additions & 3 deletions source/alignment/tools/alignment_breaks.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,35 @@
#ifndef ALIGNMENT_BREAKS_H_INCLUDED
#define ALIGNMENT_BREAKS_H_INCLUDED

//#include "alignment/align_type_aliases.h"
#include "common/type_aliases.h"

//#include <iosfwd>

namespace cath { namespace align { class alignment; } }

namespace cath {
namespace align {
namespace detail {
/// \brief TODOCUMENT
enum class break_pair_validity {
GOOD,
BAD
};

/// \brief TODOCUMENT
enum class break_pair_future {
NEVER_AGAIN,
MAYBE_LATER
};

/// \brief TODOCUMENT
using break_pair_validity_and_future = std::pair<break_pair_validity, break_pair_future>;

break_pair_validity_and_future check_pair(const alignment &,
const size_t &,
const size_t &);
}

size_vec get_alignment_breaks(const alignment &);
size_size_pair_vec get_alignment_break_pairs(const alignment &);

}
}
Expand Down

0 comments on commit 913ab43

Please sign in to comment.