Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #36 from lennax/MMCIFModels

Fix: Multiple MMCIF models: store real id as serial
  • Loading branch information...
commit 7b02facb4abebf26dc7d1896dd522dee97086a94 2 parents a9eb35a + e0dd628
@etal etal authored
Showing with 39 additions and 11 deletions.
  1. +29 −7 Bio/PDB/MMCIFParser.py
  2. +10 −4 Tests/test_MMCIF.py
View
36 Bio/PDB/MMCIFParser.py
@@ -11,6 +11,8 @@
from Bio.PDB.MMCIF2Dict import MMCIF2Dict
from Bio.PDB.StructureBuilder import StructureBuilder
+from Bio.PDB.PDBExceptions import \
+ PDBConstructionException, PDBConstructionWarning
class MMCIFParser(object):
@@ -24,7 +26,10 @@ def _build_structure(self, structure_id):
mmcif_dict=self._mmcif_dict
atom_id_list=mmcif_dict["_atom_site.label_atom_id"]
residue_id_list=mmcif_dict["_atom_site.label_comp_id"]
- element_list = mmcif_dict["_atom_site.type_symbol"]
+ try:
+ element_list = mmcif_dict["_atom_site.type_symbol"]
+ except KeyError:
+ element_list = None
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
chain_id_list=mmcif_dict["_atom_site.label_asym_id"]
x_list=map(float, mmcif_dict["_atom_site.Cartn_x"])
@@ -34,7 +39,14 @@ def _build_structure(self, structure_id):
b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"]
occupancy_list=mmcif_dict["_atom_site.occupancy"]
fieldname_list=mmcif_dict["_atom_site.group_PDB"]
- model_list = mmcif_dict["_atom_site.pdbx_PDB_model_num"]
+ try:
+ serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
+ except KeyError:
+ # No model number column
+ serial_list = None
+ except ValueError:
+ # Invalid model number (malformed file)
+ raise PDBConstructionException("Invalid model number")
try:
aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"]
aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"]
@@ -58,7 +70,10 @@ def _build_structure(self, structure_id):
structure_builder=self._structure_builder
structure_builder.init_structure(structure_id)
structure_builder.init_seg(" ")
- current_model_id = -1
+ # Historically, Biopython PDB parser uses model_id to mean array index
+ # so serial_id means the Model ID specified in the file
+ current_model_id = 0
+ current_serial_id = 0
for i in xrange(0, len(atom_id_list)):
x=x_list[i]
y=y_list[i]
@@ -77,9 +92,16 @@ def _build_structure(self, structure_id):
hetatm_flag="H"
else:
hetatm_flag=" "
- model_id = model_list[i]
- if current_model_id != model_id:
- current_model_id = model_id
+ if serial_list is not None:
+ # model column exists; use it
+ serial_id = serial_list[i]
+ if current_serial_id != serial_id:
+ # if serial changes, update it and start new model
+ current_serial_id = serial_id
+ structure_builder.init_model(current_model_id, current_serial_id)
+ current_model_id += 1
+ else:
+ # no explicit model column; initialize single model
structure_builder.init_model(current_model_id)
if current_chain_id!=chainid:
current_chain_id=chainid
@@ -94,7 +116,7 @@ def _build_structure(self, structure_id):
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
coord=numpy.array((x, y, z), 'f')
- element = element_list[i]
+ element = element_list[i] if element_list else None
structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
name, element=element)
if aniso_flag==1:
View
14 Tests/test_MMCIF.py
@@ -46,8 +46,10 @@ def test_parser(self):
self.assertEqual(len(structure), 1)
for ppbuild in [PPBuilder(), CaPPBuilder()]:
#==========================================================
+ # Check that serial_num (model column) is stored properly
+ self.assertEqual(structure[0].serial_num, 1)
#First try allowing non-standard amino acids,
- polypeptides = ppbuild.build_peptides(structure['1'], False)
+ polypeptides = ppbuild.build_peptides(structure[0], False)
self.assertEqual(len(polypeptides), 1)
pp = polypeptides[0]
# Check the start and end positions
@@ -64,7 +66,7 @@ def test_parser(self):
#Now try strict version with only standard amino acids
#Should ignore MSE 151 at start, and then break the chain
#at MSE 185, and MSE 214,215
- polypeptides = ppbuild.build_peptides(structure['1'], True)
+ polypeptides = ppbuild.build_peptides(structure[0], True)
self.assertEqual(len(polypeptides), 3)
#First fragment
pp = polypeptides[0]
@@ -98,8 +100,12 @@ def testModels(self):
self.assertEqual(len(structure), 3)
for ppbuild in [PPBuilder(), CaPPBuilder()]:
#==========================================================
+ # Check that serial_num (model column) is stored properly
+ self.assertEqual(structure[0].serial_num, 1)
+ self.assertEqual(structure[1].serial_num, 2)
+ self.assertEqual(structure[2].serial_num, 3)
#First try allowing non-standard amino acids,
- polypeptides = ppbuild.build_peptides(structure['1'], False)
+ polypeptides = ppbuild.build_peptides(structure[0], False)
self.assertEqual(len(polypeptides), 1)
pp = polypeptides[0]
# Check the start and end positions
@@ -114,7 +120,7 @@ def testModels(self):
str(s))
#==========================================================
#Now try strict version with only standard amino acids
- polypeptides = ppbuild.build_peptides(structure['1'], True)
+ polypeptides = ppbuild.build_peptides(structure[0], True)
self.assertEqual(len(polypeptides), 1)
pp = polypeptides[0]
# Check the start and end positions
Please sign in to comment.
Something went wrong with that request. Please try again.