Skip to content

Commit

Permalink
Integrate name_set and name_set_list through code
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyelewis committed Oct 13, 2017
1 parent 8078efc commit 9831df9
Show file tree
Hide file tree
Showing 50 changed files with 2,118 additions and 1,800 deletions.
3,172 changes: 1,586 additions & 1,586 deletions build-test-data/residue_ids/Q9HAU8.multiple_superposed_models.pml

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "common/boost_addenda/range/front.hpp"
#include "common/clone/make_uptr_clone.hpp"
#include "common/file/open_fstream.hpp"
#include "file/name_set/name_set_list.hpp"
#include "file/pdb/pdb.hpp"
#include "file/pdb/pdb_atom.hpp"
#include "file/pdb/pdb_list.hpp"
Expand Down Expand Up @@ -101,7 +102,7 @@ pair<alignment, superpose_orderer> ssap_scores_file_alignment_acquirer::do_get_a

// BOOST_LOG_TRIVIAL( warning )<< "About to attempt to build protein list using data that's been read from ssaps_filename (with " << arg_pdbs.size() << " pdbs and " << names.size() << " names)";

const protein_list proteins_of_pdbs = build_protein_list_of_pdb_list_and_names( arg_pdbs, names );
const protein_list proteins_of_pdbs = build_protein_list_of_pdb_list_and_names( arg_pdbs, build_name_set_list( names ) );
const alignment scored_new_alignment = score_alignment_copy( residue_scorer(), new_alignment, proteins_of_pdbs );


Expand Down
5 changes: 3 additions & 2 deletions source/acquirer/pdbs_acquirer/domain_defn_pdbs_acquirer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "chopping/domain/domain_definition.hpp"
#include "common/clone/make_uptr_clone.hpp"
#include "file/domain_definition_list/domain_definition_list.hpp"
#include "file/name_set/name_set_list.hpp"
#include "file/options/data_dirs_spec.hpp"
#include "file/pdb/pdb.hpp"
#include "file/pdb/pdb_atom.hpp"
Expand All @@ -46,8 +47,8 @@ unique_ptr<pdbs_acquirer> domain_defn_pdbs_acquirer::do_clone() const {
}

/// \brief TODOCUMENT
pdb_list_str_vec_pair domain_defn_pdbs_acquirer::do_get_pdbs_and_names(istream &/*arg_istream*/ ///< TODOCUMENT
) const {
pdb_list_name_set_list_pair domain_defn_pdbs_acquirer::do_get_pdbs_and_names(istream &/*arg_istream*/ ///< TODOCUMENT
) const {
const domain_definition_list the_dom_defns = parse_domain_definition_file( domain_defn_file );
BOOST_LOG_TRIVIAL( warning ) << "Currently using a hard-coded domain PDB directory : /cath/data/current/pdb";
return read_domains_from_pdbs( the_dom_defns, build_data_dirs_spec_of_dir( "/cath/data/current/pdb" ) );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace cath {
boost::filesystem::path domain_defn_file;

std::unique_ptr<pdbs_acquirer> do_clone() const final;
file::pdb_list_str_vec_pair do_get_pdbs_and_names(std::istream &) const final;
file::pdb_list_name_set_list_pair do_get_pdbs_and_names(std::istream &) const final;

public:
explicit domain_defn_pdbs_acquirer(const boost::filesystem::path &);
Expand Down
17 changes: 10 additions & 7 deletions source/acquirer/pdbs_acquirer/file_list_pdbs_acquirer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,42 @@
#include "file_list_pdbs_acquirer.hpp"

#include "common/clone/make_uptr_clone.hpp"
#include "file/pdb/pdb_list.hpp"
#include "file/name_set/name_set_list.hpp"
#include "file/pdb/pdb.hpp"
#include "file/pdb/pdb_atom.hpp"
#include "file/pdb/pdb_list.hpp"
#include "file/pdb/pdb_residue.hpp"

using namespace cath::common;
using namespace cath::file;
using namespace cath::opts;
using namespace std;

using boost::filesystem::path;
using std::istream;
using std::make_pair;
using std::unique_ptr;

/// \brief A standard do_clone method.
unique_ptr<pdbs_acquirer> file_list_pdbs_acquirer::do_clone() const {
return { make_uptr_clone( *this ) };
}

/// \brief TODOCUMENT
pdb_list_str_vec_pair file_list_pdbs_acquirer::do_get_pdbs_and_names(istream &/*arg_istream*/ ///< TODOCUMENT
) const {
pdb_list_name_set_list_pair file_list_pdbs_acquirer::do_get_pdbs_and_names(istream &/*arg_istream*/ ///< TODOCUMENT
) const {
// Create a vector of PDBs to be superposed
pdb_list pdbs;
str_vec names;
name_set_vec names;

// Otherwise, load the PDBs from files
const size_t num_input_files = files.size();
for (size_t input_file_ctr = 0; input_file_ctr < num_input_files; ++input_file_ctr) {
const path &input_filename = files[ input_file_ctr ];
const pdb my_new_pdb = read_pdb_file( input_filename );
pdbs.push_back( my_new_pdb );
names.push_back( ( input_filename.stem() ).string() );
names.emplace_back( input_filename );
}
return make_pair( pdbs, names );
return make_pair( pdbs, name_set_list{ std::move( names ) } );
}

/// \brief Ctor for file_list_pdbs_acquirer.
Expand Down
2 changes: 1 addition & 1 deletion source/acquirer/pdbs_acquirer/file_list_pdbs_acquirer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace cath {
path_vec files;

std::unique_ptr<pdbs_acquirer> do_clone() const final;
file::pdb_list_str_vec_pair do_get_pdbs_and_names(std::istream &) const final;
file::pdb_list_name_set_list_pair do_get_pdbs_and_names(std::istream &) const final;

public:
explicit file_list_pdbs_acquirer(path_vec);
Expand Down
44 changes: 26 additions & 18 deletions source/acquirer/pdbs_acquirer/istream_pdbs_acquirer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@

#include "istream_pdbs_acquirer.hpp"

#include <boost/lexical_cast.hpp>

#include "common/algorithm/transform_build.hpp"
#include "common/boost_addenda/range/indices.hpp"
#include "common/clone/make_uptr_clone.hpp"
#include "file/pdb/pdb_list.hpp"
#include "file/name_set/name_set_list.hpp"
#include "file/pdb/pdb.hpp"
#include "file/pdb/pdb_atom.hpp"
#include "file/pdb/pdb_list.hpp"
#include "file/pdb/pdb_residue.hpp"

using namespace cath::common;
Expand All @@ -41,22 +42,29 @@ unique_ptr<pdbs_acquirer> istream_pdbs_acquirer::do_clone() const {
}

/// \brief TODOCUMENT
pdb_list_str_vec_pair istream_pdbs_acquirer::do_get_pdbs_and_names(istream &arg_istream ///< TODOCUMENT
) const {
// Create a vector of PDBs to be superposed
pdb_list pdbs;
str_vec names;
pdb_list_name_set_list_pair istream_pdbs_acquirer::do_get_pdbs_and_names(istream &arg_istream ///< TODOCUMENT
) const {
using std::to_string;

// Read PDBs from the_istream
pdbs = read_end_separated_pdb_files( arg_istream );
names.assign( pdbs.size(), string() );
for (size_t names_ctr = 0; names_ctr < names.size(); ++names_ctr) {
names[ names_ctr ] = "PDB_"
+ lexical_cast<string>( names_ctr + 1 )
+ "_from_stdin (with "
+ lexical_cast<string>( pdbs[ names_ctr ].get_num_atoms() )
+ " atoms)";
}
return make_pair( pdbs, names );
const pdb_list pdbs = read_end_separated_pdb_files( arg_istream );

return make_pair(
pdbs,
name_set_list{
transform_build<name_set_vec>(
indices( pdbs.size() ),
[&] (const size_t &x) {
return name_set{
"PDB_"
+ to_string( x + 1 )
+ "_from_stdin (with "
+ to_string( pdbs[ x ].get_num_atoms() )
+ " atoms)"
};
}
)
}
);
}

2 changes: 1 addition & 1 deletion source/acquirer/pdbs_acquirer/istream_pdbs_acquirer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace cath {
class istream_pdbs_acquirer final : public pdbs_acquirer {
private:
std::unique_ptr<pdbs_acquirer> do_clone() const final;
file::pdb_list_str_vec_pair do_get_pdbs_and_names(std::istream &) const final;
file::pdb_list_name_set_list_pair do_get_pdbs_and_names(std::istream &) const final;
};

} // namespace opts
Expand Down
38 changes: 22 additions & 16 deletions source/acquirer/pdbs_acquirer/pdbs_acquirer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ using namespace cath::opts;
using boost::none;
using std::cerr;
using std::istream;
using std::make_pair;
using std::pair;
using std::unique_ptr;
using std::vector;
Expand All @@ -59,13 +60,13 @@ unique_ptr<pdbs_acquirer> pdbs_acquirer::clone() const {
///
/// \TODO Consider taking an ostream_ref_opt argument rather than assuming cerr
/// (fix all errors, *then* provide default of boost::none)
pdb_list_str_vec_pair pdbs_acquirer::get_pdbs_and_names(istream &arg_istream, ///< TODOCUMENT
const bool &arg_remove_partial_residues ///< TODOCUMENT
) const {
pair<pdb_list, str_vec> pdbs_and_names = do_get_pdbs_and_names( arg_istream );
pdb_list_name_set_list_pair pdbs_acquirer::get_pdbs_and_names(istream &arg_istream, ///< TODOCUMENT
const bool &arg_remove_partial_residues ///< TODOCUMENT
) const {
pair<pdb_list, name_set_list> pdbs_and_names = do_get_pdbs_and_names( arg_istream );
// Create a vector of PDBs to be superposed
pdb_list &pdbs = pdbs_and_names.first;
str_vec &names = pdbs_and_names.second;
pdb_list &pdbs = pdbs_and_names.first;
name_set_list &names = pdbs_and_names.second;

// Check the number of source files and then grab them
// if (pdbs.size() < 2) {
Expand All @@ -77,7 +78,13 @@ pdb_list_str_vec_pair pdbs_acquirer::get_pdbs_and_names(istream &arg_istream,

// If the number of names doesn't match the number of PDBs then throw a wobbly
if ( names.size() != pdbs.size() ) {
BOOST_THROW_EXCEPTION(invalid_argument_exception("The number of names doesn't match the number of PDBs"));
BOOST_THROW_EXCEPTION(invalid_argument_exception(
"The number of names ("
+ std::to_string( names.size() )
+ ") doesn't match the number of PDBs ("
+ std::to_string( pdbs.size() )
+ ")"
));
}

return arg_remove_partial_residues ? make_pair( pdb_list_of_backbone_complete_subset_pdbs( pdbs, ref( cerr ) ), names )
Expand Down Expand Up @@ -155,21 +162,20 @@ pair<str_opt_vec, region_vec_opt_vec> strip_domain_vec(domain_vec arg_domains //
///
/// In the future, it may be worth building more interesting types (than str_vec) to record both the provenance (arg_names_from_acq)
/// and user-specified names (arg_ids) of the structure
strucs_context cath::opts::combine_acquired_pdbs_and_names_with_ids_and_domains(pdb_list arg_pdbs, ///< The PDBs obtained from a pdbs_acquirer
str_vec arg_names_from_acq, ///< The names obtained from a pdbs_acquirer
str_vec specified_ids, ///< Alternative IDs
domain_vec arg_domains ///< Regions for the strucs_context
strucs_context cath::opts::combine_acquired_pdbs_and_names_with_ids_and_domains(pdb_list arg_pdbs, ///< The PDBs obtained from a pdbs_acquirer
name_set_list arg_names_from_acq, ///< The names obtained from a pdbs_acquirer
str_vec specified_ids, ///< Alternative IDs
domain_vec arg_domains ///< Regions for the strucs_context
) {
auto stripped_domain_vec = strip_domain_vec( std::move( arg_domains ) );

stripped_domain_vec.second.resize( arg_names_from_acq.size(), none );

return {
std::move( arg_pdbs ),
build_name_set_list(
std::move( arg_names_from_acq ),
std::move( specified_ids ),
std::move( stripped_domain_vec.first )
std::move( arg_pdbs ),
add_specified_ids_copy(
std::move( arg_names_from_acq ),
std::move( specified_ids )
),
std::move( stripped_domain_vec.second )
};
Expand Down
8 changes: 4 additions & 4 deletions source/acquirer/pdbs_acquirer/pdbs_acquirer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ namespace cath {
virtual std::unique_ptr<pdbs_acquirer> do_clone() const = 0;

/// \brief TODOCUMENT
virtual std::pair<file::pdb_list, str_vec> do_get_pdbs_and_names(std::istream &) const = 0;
virtual file::pdb_list_name_set_list_pair do_get_pdbs_and_names(std::istream &) const = 0;

public:
pdbs_acquirer() = default;
Expand All @@ -58,8 +58,8 @@ namespace cath {
pdbs_acquirer & operator=(const pdbs_acquirer &) = default;
pdbs_acquirer & operator=(pdbs_acquirer &&) noexcept = default;

file::pdb_list_str_vec_pair get_pdbs_and_names(std::istream &,
const bool &) const;
file::pdb_list_name_set_list_pair get_pdbs_and_names(std::istream &,
const bool &) const;
};

uptr_vec<pdbs_acquirer> get_pdbs_acquirers(const pdb_input_spec &);
Expand All @@ -68,7 +68,7 @@ namespace cath {
std::unique_ptr<pdbs_acquirer> get_pdbs_acquirer(const pdb_input_spec &);

file::strucs_context combine_acquired_pdbs_and_names_with_ids_and_domains(file::pdb_list,
str_vec,
file::name_set_list,
str_vec,
chop::domain_vec);

Expand Down
33 changes: 22 additions & 11 deletions source/alignment/io/alignment_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "common/type_aliases.hpp"
#include "exception/invalid_argument_exception.hpp"
#include "exception/runtime_error_exception.hpp"
#include "file/name_set/name_set_list.hpp"
#include "file/pdb/pdb.hpp"
#include "file/pdb/pdb_atom.hpp"
#include "file/pdb/pdb_list.hpp"
Expand Down Expand Up @@ -926,7 +927,7 @@ ostream & cath::align::write_alignment_as_fasta_alignment(ostream &ar
const protein &the_protein = arg_proteins[ entry_ctr ];

// Output the title (ie name of this protein)
arg_os << ">" << the_protein.get_title() << "\n";
arg_os << ">" << get_domain_or_specified_or_name_from_acq( the_protein ) << "\n";

// Loop over the indices of the alignment
for (size_t aln_index = 0; aln_index < length; ++aln_index) {
Expand All @@ -948,10 +949,10 @@ ostream & cath::align::write_alignment_as_fasta_alignment(ostream &ar
/// \brief Output an alignment in FASTA format
///
/// \relates alignment
ostream & cath::align::write_alignment_as_fasta_alignment(ostream &arg_os, ///< TODOCUMENT
const alignment &arg_alignment, ///< TODOCUMENT
const pdb_list &arg_pdbs, ///< TODOCUMENT
const str_vec &arg_names ///< TODOCUMENT
ostream & cath::align::write_alignment_as_fasta_alignment(ostream &arg_os, ///< TODOCUMENT
const alignment &arg_alignment, ///< TODOCUMENT
const pdb_list &arg_pdbs, ///< TODOCUMENT
const name_set_list &arg_names ///< TODOCUMENT
) {
return write_alignment_as_fasta_alignment(
arg_os,
Expand All @@ -963,8 +964,8 @@ ostream & cath::align::write_alignment_as_fasta_alignment(ostream &arg_o
/// \brief Output an alignment in FASTA format
///
/// \relates alignment
string cath::align::alignment_as_fasta_string(const alignment &arg_alignment, ///< TODOCUMENT
const protein_list &arg_proteins ///< TODOCUMENT
string cath::align::alignment_as_fasta_string(const alignment &arg_alignment, ///< The alignment to represent in FASTA format
const protein_list &arg_proteins ///< The proteins corresponding to the entries in the alignment
) {
ostringstream the_out_ss;
write_alignment_as_fasta_alignment( the_out_ss, arg_alignment, arg_proteins);
Expand All @@ -974,11 +975,21 @@ string cath::align::alignment_as_fasta_string(const alignment &arg_alignment,
/// \brief Output an alignment in FASTA format
///
/// \relates alignment
string cath::align::alignment_as_fasta_string(const alignment &arg_alignment, ///< TODOCUMENT
const pdb_list &arg_pdbs, ///< TODOCUMENT
const str_vec &arg_names ///< TODOCUMENT
string cath::align::alignment_as_fasta_string(const alignment &arg_alignment, ///< The alignment to represent in FASTA format
const pdb_list &arg_pdbs, ///< The PDBs corresponding to the entries in the alignment
const name_set_list &arg_names ///< The names corresponding to the entries in the alignment
) {
ostringstream the_out_ss;
write_alignment_as_fasta_alignment( the_out_ss, arg_alignment, arg_pdbs, arg_names);
write_alignment_as_fasta_alignment( the_out_ss, arg_alignment, arg_pdbs, arg_names );
return the_out_ss.str();
}

/// \brief Output an alignment in FASTA format
///
/// \relates alignment
std::string cath::align::alignment_as_fasta_string(const alignment &arg_alignment, ///< The alignment to represent in FASTA format
const pdb_list &arg_pdbs, ///< The PDBs corresponding to the entries in the alignment
const str_vec &arg_names ///< The names corresponding to the entries in the alignment
) {
return alignment_as_fasta_string( arg_alignment, arg_pdbs, build_name_set_list( arg_names ) );
}
13 changes: 8 additions & 5 deletions source/alignment/io/alignment_io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,13 @@
#include <iostream>
#include <string>

namespace cath { namespace file { class pdb; } }
namespace cath { namespace file { class pdb_list; } }
namespace cath { class protein; }
namespace cath { class protein_list; }

namespace cath { namespace file { class name_set_list; } }
namespace cath { namespace file { class pdb; } }
namespace cath { namespace file { class pdb_list; } }

namespace cath {

namespace align {

/// \todo Organise these into three class hierarchies: alignment_format, alignment_reader and alignment_writer
Expand Down Expand Up @@ -136,11 +135,15 @@ namespace cath {
std::ostream & write_alignment_as_fasta_alignment(std::ostream &,
const alignment &,
const file::pdb_list &,
const str_vec &);
const file::name_set_list &);

std::string alignment_as_fasta_string(const alignment &,
const protein_list &);

std::string alignment_as_fasta_string(const alignment &,
const file::pdb_list &,
const file::name_set_list &);

std::string alignment_as_fasta_string(const alignment &,
const file::pdb_list &,
const str_vec &);
Expand Down
Loading

0 comments on commit 9831df9

Please sign in to comment.