Skip to content

Commit

Permalink
Merge branch 'release-v0.0.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
cartoonist committed Jan 23, 2019
2 parents 6f6a736 + d9f85d8 commit 35e5032
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 94 deletions.
52 changes: 32 additions & 20 deletions src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ get_option_values( Options& options, seqan::ArgumentParser& parser );

void
locate_seeds( std::string& seq_name, std::string& gcsa_name, unsigned int seed_len,
bool nonoverlapping, std::string& output_name );
unsigned int distance, std::string& output_name );

void
signal_handler( int signal );
Expand All @@ -67,24 +67,24 @@ main( int argc, char* argv[] )
std::signal( SIGUSR1, signal_handler );

locate_seeds( options.seq_filename, options.gcsa_filename, options.seed_len,
options.nonoverlapping, options.output_filename );
options.distance, options.output_filename );

return EXIT_SUCCESS;
}


void
signal_handler( int signal )
signal_handler( int )
{
std::cout << "Located " << ::done_idx << " out of " << ::total_no << " in "
<< Timer::get_lap( "locate" ).count() << " us: "
<< Timer<>::get_lap_str( "locate" ) << ": "
<< ::done_idx * 100 / total_no << "% done." << std::endl;
}


void
locate_seeds( std::string& seq_name, std::string& gcsa_name, unsigned int seed_len,
bool nonoverlapping, std::string& output_name )
unsigned int distance, std::string& output_name )
{
std::ifstream seq_file( seq_name, std::ifstream::in | std::ifstream::binary );
if ( !seq_file ) {
Expand All @@ -103,38 +103,47 @@ locate_seeds( std::string& seq_name, std::string& gcsa_name, unsigned int seed_l
index.load( gcsa_file );
std::cout << "Loading sequences..." << std::endl;
{
auto timer = Timer( "sequences" );
auto timer = Timer<>( "sequences" );
std::string line;
while ( std::getline( seq_file, line ) ) {
sequences.push_back( line );
}
}
std::cout << "Loaded " << sequences.size() << " sequences in "
<< Timer::get_duration( "sequences" ).count() << " us." << std::endl;
<< Timer<>::get_duration_str( "sequences" ) << "." << std::endl;
std::cout << "Generating patterns..." << std::endl;
{
auto timer = Timer( "patterns" );
if ( nonoverlapping ) {
seeding( patterns, sequences, seed_len, GreedyNonOverlapping() );
}
else {
seeding( patterns, sequences, seed_len, GreedyOverlapping() );
}
auto timer = Timer<>( "patterns" );
seeding( patterns, sequences, seed_len, distance );
}
::total_no = patterns.size();
std::cout << "Generated " << patterns.size() << " patterns in "
<< Timer::get_duration( "patterns" ).count() << " us." << std::endl;
<< Timer<>::get_duration_str( "patterns" ) << "." << std::endl;
std::cout << "Locating patterns..." << std::endl;
std::vector< gcsa::range_type > ranges;
gcsa::size_type total = 0;
{
auto timer = Timer( "locate" );
auto timer = Timer<>( "find" );
for ( const auto& p : patterns ) {
gcsa::range_type range = index.find( p );
if( !gcsa::Range::empty( range ) ) {
ranges.push_back( range );
total += index.count( range );
}
}
}
std::cout << "Found " << ranges.size() << " patterns matching " << total << " paths in "
<< Timer<>::get_duration_str( "find" ) << "." << std::endl;
total = 0;
{
auto timer = Timer<>( "locate" );
for ( const auto& range : ranges ) {
index.locate( range, results, true );
::done_idx++;
}
}
std::cout << "Located " << results.size() << " occurrences in "
<< Timer::get_duration( "locate" ).count() << " us." << std::endl;
<< Timer<>::get_duration_str( "locate" ) << "." << std::endl;
std::cout << "Writing occurrences into file..." << std::endl;
std::ofstream output_file( output_name, std::ofstream::out );
for ( auto && r : results ) {
Expand Down Expand Up @@ -188,8 +197,10 @@ setup_argparser( seqan::ArgumentParser& parser )
seqan::ArgParseArgument::INTEGER, "INT" ) );
setRequired( parser, "l" );
// Overlapping seeds?
addOption( parser, seqan::ArgParseOption( "n", "non-overlapping",
"Use non-overlapping seeds." ) );
addOption( parser, seqan::ArgParseOption( "d", "distance",
"Distance between seeds [default: seed length given by \\fB-l\\fP]",
seqan::ArgParseArgument::INTEGER, "INT" ) );
setDefaultValue( parser, "d", 0 ); /* Default value is seed length. */
// Output file.
seqan::ArgParseOption output_arg( "o", "output",
"Write positions where sequences are matched.",
Expand All @@ -206,5 +217,6 @@ get_option_values( Options& options, seqan::ArgumentParser& parser )
getOptionValue( options.gcsa_filename, parser, "gcsa" );
getOptionValue( options.output_filename, parser, "output" );
getOptionValue( options.seed_len, parser, "seed-len" );
options.nonoverlapping = isSet( parser, "non-overlapping" );
getOptionValue( options.distance, parser, "distance" );
if ( options.distance == 0 ) options.distance = options.seed_len;
}
2 changes: 1 addition & 1 deletion src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ typedef struct {
std::string gcsa_filename;
std::string output_filename;
unsigned int seed_len;
bool nonoverlapping;
unsigned int distance;
} Options;

#endif // OPTIONS_H__
2 changes: 1 addition & 1 deletion src/release.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#define RELEASE_H__

namespace release {
const char* const version = "0.0.4"; /**< @brief Version number. */
const char* const version = "0.0.5"; /**< @brief Version number. */
const char* const name = "gcsa_locate"; /**< @brief Package name */
const char* const short_desc = "GCSA2 seed finder"; /**< @brief Short description. */
/** @brief Long description. */
Expand Down
8 changes: 4 additions & 4 deletions src/seed.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ typedef Tag< GreedyNonOverlapStrategy > GreedyNonOverlapping;
*/
template< typename TText >
inline void
_seeding( std::vector< TText >& seeds,
seeding( std::vector< TText >& seeds,
const std::vector< TText >& string_set,
unsigned int k,
unsigned int step )
Expand All @@ -59,7 +59,7 @@ template< typename TText >
seeds.push_back( string_set[idx].substr( i, k ) );
}
}
} /* ----- end of template function _seeding ----- */
} /* ----- end of template function seeding ----- */

/**
* @brief Seeding a set of sequence by reporting overlapping k-mers.
Expand All @@ -79,7 +79,7 @@ template< typename TText >
GreedyOverlapping )
{
seeds.clear();
_seeding( seeds, string_set, k, 1 );
seeding( seeds, string_set, k, 1 );
} /* ----- end of template function seeding ----- */

/**
Expand All @@ -100,7 +100,7 @@ template< typename TText >
NonOverlapping )
{
seeds.clear();
_seeding( seeds, string_set, k, k );
seeding( seeds, string_set, k, k );
} /* ----- end of function seeding ----- */

/**
Expand Down

0 comments on commit 35e5032

Please sign in to comment.