Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

185 lines (168 sloc) 7.535 kb
# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""mmCIF parser"""
from __future__ import print_function
from string import ascii_letters
import numpy
from Bio._py3k import range
from Bio.PDB.MMCIF2Dict import MMCIF2Dict
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.PDBExceptions import PDBConstructionException
class MMCIFParser(object):
def get_structure(self, structure_id, filename):
self._mmcif_dict=MMCIF2Dict(filename)
self._structure_builder=StructureBuilder()
self._build_structure(structure_id)
return self._structure_builder.get_structure()
def _build_structure(self, structure_id):
mmcif_dict=self._mmcif_dict
atom_id_list=mmcif_dict["_atom_site.label_atom_id"]
residue_id_list=mmcif_dict["_atom_site.label_comp_id"]
try:
element_list = mmcif_dict["_atom_site.type_symbol"]
except KeyError:
element_list = None
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
chain_id_list=mmcif_dict["_atom_site.label_asym_id"]
x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]]
y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
alt_list=mmcif_dict["_atom_site.label_alt_id"]
b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"]
occupancy_list=mmcif_dict["_atom_site.occupancy"]
fieldname_list=mmcif_dict["_atom_site.group_PDB"]
try:
serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
except KeyError:
# No model number column
serial_list = None
except ValueError:
# Invalid model number (malformed file)
raise PDBConstructionException("Invalid model number")
try:
aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"]
aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"]
aniso_u13=mmcif_dict["_atom_site.aniso_U[1][3]"]
aniso_u22=mmcif_dict["_atom_site.aniso_U[2][2]"]
aniso_u23=mmcif_dict["_atom_site.aniso_U[2][3]"]
aniso_u33=mmcif_dict["_atom_site.aniso_U[3][3]"]
aniso_flag=1
except KeyError:
# no anisotropic B factors
aniso_flag=0
# if auth_seq_id is present, we use this.
# Otherwise label_seq_id is used.
if "_atom_site.auth_seq_id" in mmcif_dict:
seq_id_list=mmcif_dict["_atom_site.auth_seq_id"]
else:
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
# Now loop over atoms and build the structure
current_chain_id=None
current_residue_id=None
structure_builder=self._structure_builder
structure_builder.init_structure(structure_id)
structure_builder.init_seg(" ")
# Historically, Biopython PDB parser uses model_id to mean array index
# so serial_id means the Model ID specified in the file
current_model_id = 0
current_serial_id = 0
for i in range(0, len(atom_id_list)):
x=x_list[i]
y=y_list[i]
z=z_list[i]
resname=residue_id_list[i]
chainid=chain_id_list[i]
altloc=alt_list[i]
if altloc==".":
altloc=" "
resseq=seq_id_list[i]
name=atom_id_list[i]
# occupancy & B factor
try:
tempfactor=float(b_factor_list[i])
except ValueError:
raise PDBConstructionException("Invalid or missing B factor")
try:
occupancy=float(occupancy_list[i])
except ValueError:
raise PDBConstructionException("Invalid or missing occupancy")
fieldname=fieldname_list[i]
if fieldname=="HETATM":
hetatm_flag="H"
else:
hetatm_flag=" "
if serial_list is not None:
# model column exists; use it
serial_id = serial_list[i]
if current_serial_id != serial_id:
# if serial changes, update it and start new model
current_serial_id = serial_id
structure_builder.init_model(current_model_id, current_serial_id)
current_model_id += 1
else:
# no explicit model column; initialize single model
structure_builder.init_model(current_model_id)
if current_chain_id!=chainid:
current_chain_id=chainid
structure_builder.init_chain(current_chain_id)
current_residue_id=resseq
icode, int_resseq=self._get_icode(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
elif current_residue_id!=resseq:
current_residue_id=resseq
icode, int_resseq=self._get_icode(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
coord=numpy.array((x, y, z), 'f')
element = element_list[i] if element_list else None
structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
name, element=element)
if aniso_flag==1:
u=(aniso_u11[i], aniso_u12[i], aniso_u13[i],
aniso_u22[i], aniso_u23[i], aniso_u33[i])
mapped_anisou = [float(x) for x in u]
anisou_array=numpy.array(mapped_anisou, 'f')
structure_builder.set_anisou(anisou_array)
# Now try to set the cell
try:
a=float(mmcif_dict["_cell.length_a"])
b=float(mmcif_dict["_cell.length_b"])
c=float(mmcif_dict["_cell.length_c"])
alpha=float(mmcif_dict["_cell.angle_alpha"])
beta=float(mmcif_dict["_cell.angle_beta"])
gamma=float(mmcif_dict["_cell.angle_gamma"])
cell=numpy.array((a, b, c, alpha, beta, gamma), 'f')
spacegroup=mmcif_dict["_symmetry.space_group_name_H-M"]
spacegroup=spacegroup[1:-1] # get rid of quotes!!
if spacegroup is None:
raise Exception
structure_builder.set_symmetry(spacegroup, cell)
except:
pass # no cell found, so just ignore
def _get_icode(self, resseq):
"""Tries to return the icode. In MMCIF files this is just part of
resseq! In PDB files, it's a separate field."""
last_resseq_char=resseq[-1]
if last_resseq_char in ascii_letters:
icode=last_resseq_char
int_resseq=int(resseq[0:-1])
else:
icode=" "
int_resseq=int(resseq)
return icode, int_resseq
if __name__=="__main__":
import sys
if len(sys.argv) != 2:
print("Usage: python MMCIFparser.py filename")
raise SystemExit
filename=sys.argv[1]
p=MMCIFParser()
structure=p.get_structure("test", filename)
for model in structure.get_list():
print(model)
for chain in model.get_list():
print(chain)
print("Found %d residues." % len(chain.get_list()))
Jump to Line
Something went wrong with that request. Please try again.