Skip to content

Commit

Permalink
Remove dups in get_backbone_complete_residue_ids()
Browse files Browse the repository at this point in the history
Making proteins from PDBs involves removing duplicate residues so it
makes sense that get_backbone_complete_residue_ids() should do the same
so that the results tally

Fixes #52
  • Loading branch information
tonyelewis committed Jan 3, 2018
1 parent 0b73277 commit f9b408a
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 6 deletions.
25 changes: 19 additions & 6 deletions source/uni/file/pdb/pdb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@

#include "biocore/residue_id.hpp"
#include "chopping/region/region.hpp"
#include "common/algorithm/contains.hpp"
#include "common/algorithm/copy_build.hpp"
#include "common/algorithm/sort_uniq_copy.hpp"
#include "common/algorithm/transform_build.hpp"
#include "common/boost_addenda/log/log_to_ostream_guard.hpp"
#include "common/boost_addenda/range/adaptor/adjacented.hpp"
Expand All @@ -59,6 +61,7 @@

#include <fstream>
#include <iostream>
#include <set>
#include <sstream>
#include <tuple>

Expand Down Expand Up @@ -434,6 +437,8 @@ coord cath::file::get_residue_ca_coord_of_region_limited_backbone_complete_index

/// \brief Get the list of residues IDs for the backbone-complete residues on first chain of the specified PDB
///
/// \TODO This should probably also remove residues with duplicate residue IDs
///
/// \relates pdb
residue_id_vec cath::file::get_backbone_complete_residue_ids_of_first_chain(const pdb &arg_pdb, ///< The PDB to query
const bool &arg_complete_backbone_only ///< Whether to restrict to the backbone-complete residues
Expand All @@ -458,15 +463,23 @@ residue_id_vec cath::file::get_backbone_complete_residue_ids_of_first_chain(cons

/// \brief Get the list of residues IDs for the backbone-complete residues on all chains of the specified PDB
///
/// This also remove residues with duplicate residue IDs
///
/// \relates pdb
residue_id_vec cath::file::get_backbone_complete_residue_ids(const pdb &arg_pdb ///< The PDB to query
) {
return arg_pdb.empty()
? residue_id_vec{}
: transform_build<residue_id_vec>(
arg_pdb | filtered( is_backbone_complete ),
[] (const pdb_residue &x) { return x.get_residue_id(); }
);
residue_id_vec results;
vector<residue_id> seen_res_ids;
for (const pdb_residue &the_res : arg_pdb) {
if ( is_backbone_complete( the_res ) ) {
const residue_id the_res_id = the_res.get_residue_id();
if ( ! contains( seen_res_ids, the_res_id ) ) {
seen_res_ids.push_back( the_res_id );
results.push_back( the_res_id );
}
}
}
return results;
}

/// \brief TODOCUMENT
Expand Down
4 changes: 4 additions & 0 deletions source/uni/file/pdb/pdb_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ backbone_complete_indices_vec cath::file::get_backbone_complete_indices(const pd

/// \brief Get the lists of residue IDs for all chains of each of the PDBs in the specified pdb_list
///
/// This also remove residues with duplicate residue IDs
///
/// \relates pdb_list
residue_id_vec_vec cath::file::get_backbone_complete_residue_ids(const pdb_list &arg_pdb_list ///< The PDBs to query
) {
Expand All @@ -261,6 +263,8 @@ residue_id_vec_vec cath::file::get_backbone_complete_residue_ids(const pdb_list

/// \brief Get the lists of residue IDs for the first chains of each of the PDBs in the specified pdb_list
///
/// \TODO This should probably also remove residues with duplicate residue IDs
///
/// \relates pdb_list
residue_id_vec_vec cath::file::get_backbone_complete_residue_ids_of_first_chains(const pdb_list &arg_pdb_list ///< The PDBs to query
) {
Expand Down
39 changes: 39 additions & 0 deletions source/uni/file/pdb/pdb_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,45 @@ END
BOOST_TEST( pdb_file_to_string( read_pdb( pdb_data ) ) == pdb_data );
}

BOOST_AUTO_TEST_CASE(get_backbone_complete_residue_ids__removes_dupl_res_ids) {
ostringstream test_ss;
const log_to_ostream_guard the_guard{ test_ss };

// From 4tsw (as of ~January 2018)
//
// Ensure that get_backbone_complete_residue_ids() removes duplicate residue_ids
// so that it matches the behaviour of making proteins from PDBs.
// If necessary, it'd be possible to make both try to handle them but
// that will likely require work to fix arising problems and the issue
// of duplicated residue is rare and is (mostly? completely?) restricted
// to superseded PDBs
const string pdb_data = R"(ATOM 2229 N GLN B 102 31.966 19.126 31.868 1.00 57.91 N
ATOM 2230 CA GLN B 102 32.135 19.286 30.428 1.00 63.27 C
ATOM 2231 C GLN B 102 31.098 20.226 29.827 1.00 65.08 C
ATOM 2232 O GLN B 102 31.295 20.773 28.738 1.00 66.67 O
ATOM 2241 N ARG B 103 30.036 20.481 30.582 1.00 67.41 N
ATOM 2242 CA ARG B 103 28.981 21.363 30.110 1.00 69.06 C
ATOM 2243 C ARG B 103 28.668 22.406 31.174 1.00 68.80 C
ATOM 2244 O ARG B 103 29.483 22.638 32.071 1.00 69.14 O
ATOM 2247 N GLU B 103 27.492 23.017 31.036 1.00 69.00 N
ATOM 2248 CA GLU B 103 26.946 24.055 31.918 1.00 69.65 C
ATOM 2249 C GLU B 103 26.058 24.961 31.066 1.00 71.54 C
ATOM 2250 O GLU B 103 26.334 25.177 29.877 1.00 72.21 O
ATOM 2253 N THR B 105 24.903 25.407 31.312 1.00 71.90 N
ATOM 2254 CA THR B 105 24.124 26.292 30.435 1.00 69.49 C
ATOM 2255 C THR B 105 23.000 27.081 31.135 1.00 67.87 C
ATOM 2256 O THR B 105 22.911 27.101 32.373 1.00 63.94 O
TER 2257 THR B 105
END
)";
const residue_id_vec expected = { {
make_residue_id( 'B', 102 ),
make_residue_id( 'B', 103 ),
make_residue_id( 'B', 105 ),
} };
BOOST_TEST( get_backbone_complete_residue_ids( read_pdb( pdb_data ) ) == expected );
}

BOOST_AUTO_TEST_CASE(writes_partial_pdb_correctly) {
const auto parsed_pdb = read_pdb_file( global_test_constants::EXAMPLE_A_PDB_FILENAME() );

Expand Down

0 comments on commit f9b408a

Please sign in to comment.