Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adde the MMTF parser, test data and test code.
- Loading branch information
Showing
7 changed files
with
3,574 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
from Bio.PDB.StructureBuilder import StructureBuilder | ||
|
||
class StructureDecoder(object): | ||
"""Class to pass the data from mmtf-python into a BioPython data structure.""" | ||
|
||
def __init__(self): | ||
self.this_type = "" | ||
|
||
def init_structure(self, total_num_bonds, total_num_atoms, total_num_groups, total_num_chains, total_num_models, structure_id): | ||
"""Initialise the structure object. | ||
:param total_num_bonds: the number of bonds in the structure | ||
:param total_num_atoms: the number of atoms in the structure | ||
:param total_num_groups: the number of groups in the structure | ||
:param total_num_chains: the number of chains in the structure | ||
:param total_num_models: the number of models in the structure | ||
:param structure_id the: id of the structure (e.g. PDB id) | ||
""" | ||
self.structure_bulder = StructureBuilder() | ||
self.structure_bulder.init_structure(structure_id=structure_id) | ||
self.chain_index_to_type_map = {} | ||
self.chain_index_to_seq_map = {} | ||
self.chain_index_to_description_map = {} | ||
self.chain_counter = 0 | ||
|
||
|
||
def set_atom_info(self, atom_name, serial_number, alternative_location_id, x, y, z, occupancy, | ||
temperature_factor, element, charge): | ||
"""Create an atom object an set the information. | ||
:param atom_name: the atom name, e.g. CA for this atom | ||
:param serial_number: the serial id of the atom (e.g. 1) | ||
:param alternative_location_id: the alternative location id for the atom, if present | ||
:param x: the x coordiante of the atom | ||
:param y: the y coordinate of the atom | ||
:param z: the z coordinate of the atom | ||
:param occupancy: the occupancy of the atom | ||
:param temperature_factor: the temperature factor of the atom | ||
:param element: the element of the atom, e.g. C for carbon. According to IUPAC. Calcium is Ca | ||
:param charge: the formal atomic charge of the atom | ||
""" | ||
# Atom_name is in twice - the full_name is with spaces | ||
self.structure_bulder.init_atom(str(atom_name), [x,y,z], temperature_factor, occupancy, | ||
alternative_location_id, str(atom_name), | ||
serial_number=serial_number, element=str(element).upper()) | ||
|
||
def set_chain_info(self, chain_id, chain_name, num_groups): | ||
"""Set the chain information. | ||
:param chain_id: the asym chain id from mmCIF | ||
:param chain_name: the auth chain id from mmCIF | ||
:param num_groups: the number of groups this chain has | ||
""" | ||
# A Bradley - chose to use chain_name (auth_id) as it complies with current BioPython. Chain_id might be better. | ||
self.structure_bulder.init_chain(chain_id=chain_name) | ||
if self.chain_index_to_type_map[self.chain_counter] == "polymer": | ||
self.this_type = "" | ||
elif self.chain_index_to_type_map[self.chain_counter] == "non-polymer": | ||
self.this_type = "H" | ||
elif self.chain_index_to_type_map[self.chain_counter] == "water": | ||
self.this_type = "W" | ||
self.chain_counter += 1 | ||
|
||
|
||
def set_entity_info(self, chain_indices, sequence, description, entity_type): | ||
"""Set the entity level information for the structure. | ||
:param chain_indices: the indices of the chains for this entity | ||
:param sequence: the one letter code sequence for this entity | ||
:param description: the description for this entity | ||
:param entity_type: the entity type (polymer,non-polymer,water) | ||
""" | ||
for chain_ind in chain_indices: | ||
self.chain_index_to_type_map[chain_ind] = entity_type | ||
self.chain_index_to_seq_map[chain_ind] = sequence | ||
self.chain_index_to_description_map[chain_ind] = description | ||
|
||
def set_group_info(self, group_name, group_number, insertion_code, group_type, atom_count, bond_count, | ||
single_letter_code, sequence_index, secondary_structure_type): | ||
"""Set the information for a group | ||
:param group_name: the name of this group,e.g. LYS | ||
:param group_number: the residue number of this group | ||
:param insertion_code: the insertion code for this group | ||
:param group_type: a string indicating the type of group (as found in the chemcomp dictionary. | ||
Empty string if none available. | ||
:param atom_count: the number of atoms in the group | ||
:param bond_count: the number of unique bonds in the group | ||
:param single_letter_code: the single letter code of the group | ||
:param sequence_index: the index of this group in the sequence defined by the enttiy | ||
:param secondary_structure_type: the type of secondary structure used (types are according to DSSP and | ||
number to type mappings are defined in the specification) | ||
""" | ||
self.structure_bulder.init_seg(' ') | ||
self.structure_bulder.init_residue(group_name, self.this_type, group_number, insertion_code) | ||
|
||
def set_model_info(self, model_id, chain_count): | ||
"""Set the information for a model. | ||
:param model_id: the index for the model | ||
:param chain_count: the number of chains in the model | ||
""" | ||
self.structure_bulder.init_model(model_id) | ||
|
||
def set_xtal_info(self, space_group, unit_cell): | ||
"""Set the crystallographic information for the structure | ||
:param space_group: the space group name, e.g. "P 21 21 21" | ||
:param unit_cell: an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma | ||
""" | ||
self.structure_bulder.set_symmetry(space_group, unit_cell) | ||
|
||
def set_header_info(self, r_free, r_work, resolution, title, deposition_date, release_date, experimnetal_methods): | ||
"""Sets the header information. | ||
:param r_free: the measured R-Free for the structure | ||
:param r_work: the measure R-Work for the structure | ||
:param resolution: the resolution of the structure | ||
:param title: the title of the structure | ||
:param deposition_date: the deposition date of the structure | ||
:param release_date: the release date of the structure | ||
:param experimnetal_methods: the list of experimental methods in the structure | ||
""" | ||
pass | ||
|
||
def set_bio_assembly_trans(self, bio_assembly_index, input_chain_indices, input_transform): | ||
"""Set the Bioassembly transformation information. A single bioassembly can have multiple transforms, | ||
:param bio_assembly_index: the integer index of the bioassembly | ||
:param input_chain_indices: the list of integer indices for the chains of this bioassembly | ||
:param input_transformation: the list of doubles for the transform of this bioassmbly transform""" | ||
pass | ||
|
||
def finalize_structure(self): | ||
"""Any functions needed to cleanup the structure.""" | ||
pass | ||
|
||
def set_group_bond(self, atom_index_one, atom_index_two, bond_order): | ||
"""Add bonds within a group. | ||
:param atom_index_one: the integer atom index (in the group) of the first partner in the bond | ||
:param atom_index_two: the integer atom index (in the group) of the second partner in the bond | ||
:param bond_order: the integer bond order | ||
""" | ||
pass | ||
def set_inter_group_bond(self, atom_index_one, atom_index_two, bond_order): | ||
"""Add bonds between groups. | ||
:param atom_index_one: the integer atom index (in the structure) of the first partner in the bond | ||
:param atom_index_two: the integer atom index (in the structure) of the second partner in the bond | ||
:param bond_order the bond order | ||
""" | ||
pass | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
try: | ||
from mmtf import fetch, parse | ||
except ImportError: | ||
from Bio import MissingPythonDependencyError | ||
raise MissingPythonDependencyError("Install mmtf to use Bio.PDB.mmtf " | ||
"(e.g. pip install mmtf-python)") | ||
from Bio.PDB.mmtf.DefaultParser import StructureDecoder | ||
|
||
|
||
def get_from_decoded(decoder): | ||
structure_decoder = StructureDecoder() | ||
decoder.pass_data_on(structure_decoder) | ||
return structure_decoder.structure_bulder.get_structure() | ||
|
||
class MMTFParser(object): | ||
"""Class with static methods to get a BioPython structure from a url or a file.""" | ||
@staticmethod | ||
def get_structure_from_url(pdb_id): | ||
"""Get a structure from a URL - given a PDB id. | ||
:param pdb_id: the input PDB id | ||
:return the structure | ||
""" | ||
decoder = fetch(pdb_id) | ||
return get_from_decoded(decoder) | ||
|
||
@staticmethod | ||
def get_structure(file_path): | ||
"""Get a structrue from a file - given a file path. | ||
:param file_path: the input file path | ||
:return the structure | ||
""" | ||
decoder = parse(file_path) | ||
return get_from_decoded(decoder) |
Oops, something went wrong.