forked from biopython/biopython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MMCIFParser.py
180 lines (166 loc) · 7.25 KB
/
MMCIFParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""mmCIF parser"""
from string import ascii_letters
import numpy
from Bio.PDB.MMCIF2Dict import MMCIF2Dict
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.PDBExceptions import PDBConstructionException
class MMCIFParser(object):
def get_structure(self, structure_id, filename):
self._mmcif_dict=MMCIF2Dict(filename)
self._structure_builder=StructureBuilder()
self._build_structure(structure_id)
return self._structure_builder.get_structure()
def _build_structure(self, structure_id):
mmcif_dict=self._mmcif_dict
atom_id_list=mmcif_dict["_atom_site.label_atom_id"]
residue_id_list=mmcif_dict["_atom_site.label_comp_id"]
try:
element_list = mmcif_dict["_atom_site.type_symbol"]
except KeyError:
element_list = None
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
chain_id_list=mmcif_dict["_atom_site.label_asym_id"]
x_list=map(float, mmcif_dict["_atom_site.Cartn_x"])
y_list=map(float, mmcif_dict["_atom_site.Cartn_y"])
z_list=map(float, mmcif_dict["_atom_site.Cartn_z"])
alt_list=mmcif_dict["_atom_site.label_alt_id"]
b_factor_list=mmcif_dict["_atom_site.B_iso_or_equiv"]
occupancy_list=mmcif_dict["_atom_site.occupancy"]
fieldname_list=mmcif_dict["_atom_site.group_PDB"]
try:
serial_list = [int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]]
except KeyError:
# No model number column
serial_list = None
except ValueError:
# Invalid model number (malformed file)
raise PDBConstructionException("Invalid model number")
try:
aniso_u11=mmcif_dict["_atom_site.aniso_U[1][1]"]
aniso_u12=mmcif_dict["_atom_site.aniso_U[1][2]"]
aniso_u13=mmcif_dict["_atom_site.aniso_U[1][3]"]
aniso_u22=mmcif_dict["_atom_site.aniso_U[2][2]"]
aniso_u23=mmcif_dict["_atom_site.aniso_U[2][3]"]
aniso_u33=mmcif_dict["_atom_site.aniso_U[3][3]"]
aniso_flag=1
except KeyError:
# no anisotropic B factors
aniso_flag=0
# if auth_seq_id is present, we use this.
# Otherwise label_seq_id is used.
if "_atom_site.auth_seq_id" in mmcif_dict:
seq_id_list=mmcif_dict["_atom_site.auth_seq_id"]
else:
seq_id_list=mmcif_dict["_atom_site.label_seq_id"]
# Now loop over atoms and build the structure
current_chain_id=None
current_residue_id=None
structure_builder=self._structure_builder
structure_builder.init_structure(structure_id)
structure_builder.init_seg(" ")
# Historically, Biopython PDB parser uses model_id to mean array index
# so serial_id means the Model ID specified in the file
current_model_id = 0
current_serial_id = 0
for i in xrange(0, len(atom_id_list)):
x=x_list[i]
y=y_list[i]
z=z_list[i]
resname=residue_id_list[i]
chainid=chain_id_list[i]
altloc=alt_list[i]
if altloc==".":
altloc=" "
resseq=seq_id_list[i]
name=atom_id_list[i]
# occupancy & B factor
try:
tempfactor=float(b_factor_list[i])
except ValueError:
raise PDBConstructionException("Invalid or missing B factor")
try:
occupancy=float(occupancy_list[i])
except ValueError:
raise PDBConstructionException("Invalid or missing occupancy")
fieldname=fieldname_list[i]
if fieldname=="HETATM":
hetatm_flag="H"
else:
hetatm_flag=" "
if serial_list is not None:
# model column exists; use it
serial_id = serial_list[i]
if current_serial_id != serial_id:
# if serial changes, update it and start new model
current_serial_id = serial_id
structure_builder.init_model(current_model_id, current_serial_id)
current_model_id += 1
else:
# no explicit model column; initialize single model
structure_builder.init_model(current_model_id)
if current_chain_id!=chainid:
current_chain_id=chainid
structure_builder.init_chain(current_chain_id)
current_residue_id=resseq
icode, int_resseq=self._get_icode(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
elif current_residue_id!=resseq:
current_residue_id=resseq
icode, int_resseq=self._get_icode(resseq)
structure_builder.init_residue(resname, hetatm_flag, int_resseq,
icode)
coord=numpy.array((x, y, z), 'f')
element = element_list[i] if element_list else None
structure_builder.init_atom(name, coord, tempfactor, occupancy, altloc,
name, element=element)
if aniso_flag==1:
u=(aniso_u11[i], aniso_u12[i], aniso_u13[i],
aniso_u22[i], aniso_u23[i], aniso_u33[i])
mapped_anisou=map(float, u)
anisou_array=numpy.array(mapped_anisou, 'f')
structure_builder.set_anisou(anisou_array)
# Now try to set the cell
try:
a=float(mmcif_dict["_cell.length_a"])
b=float(mmcif_dict["_cell.length_b"])
c=float(mmcif_dict["_cell.length_c"])
alpha=float(mmcif_dict["_cell.angle_alpha"])
beta=float(mmcif_dict["_cell.angle_beta"])
gamma=float(mmcif_dict["_cell.angle_gamma"])
cell=numpy.array((a, b, c, alpha, beta, gamma), 'f')
spacegroup=mmcif_dict["_symmetry.space_group_name_H-M"]
spacegroup=spacegroup[1:-1] # get rid of quotes!!
if spacegroup is None:
raise Exception
structure_builder.set_symmetry(spacegroup, cell)
except:
pass # no cell found, so just ignore
def _get_icode(self, resseq):
"""Tries to return the icode. In MMCIF files this is just part of
resseq! In PDB files, it's a separate field."""
last_resseq_char=resseq[-1]
if last_resseq_char in ascii_letters:
icode=last_resseq_char
int_resseq=int(resseq[0:-1])
else:
icode=" "
int_resseq=int(resseq)
return icode, int_resseq
if __name__=="__main__":
import sys
if len(sys.argv) != 2:
print "Usage: python MMCIFparser.py filename"
raise SystemExit
filename=sys.argv[1]
p=MMCIFParser()
structure=p.get_structure("test", filename)
for model in structure.get_list():
print model
for chain in model.get_list():
print chain
print "Found %d residues." % len(chain.get_list())