-
Notifications
You must be signed in to change notification settings - Fork 94
/
nbody_by_type_lib.py
471 lines (397 loc) · 22.7 KB
/
nbody_by_type_lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
#!/usr/bin/env python
# Author: Andrew Jewett (jewett.aij at g mail)
# http://www.chem.ucsb.edu/~sheagroup
# License: 3-clause BSD License (See LICENSE.TXT)
# Copyright (c) 2012, Regents of the University of California
# All rights reserved.
import sys
from collections import defaultdict
#from collections import namedtuple
if sys.version < '2.7':
sys.stderr.write('--------------------------------------------------------\n'
'----------------- WARNING: OLD PYTHON VERSION ----------\n'
' This program is untested on your python version (' +
sys.version + ').\n'
' PLEASE LET ME KNOW IF THIS PROGRAM CRASHES (and upgrade python).\n'
' -Andrew 2013-10-25\n'
'--------------------------------------------------------\n'
'--------------------------------------------------------\n')
from ordereddict import OrderedDict
else:
from collections import OrderedDict
from collections import defaultdict
try:
from .nbody_graph_search import Ugraph, GraphMatcher
from .ttree_lex import MatchesPattern, MatchesAll, InputError
except (ImportError, SystemError, ValueError):
# not installed as a package
from nbody_graph_search import Ugraph, GraphMatcher
from ttree_lex import MatchesPattern, MatchesAll, InputError
#import gc
def GenInteractions_int(G_system,
g_bond_pattern,
typepattern_to_coefftypes,
canonical_order, # function to sort atoms and bonds
atomtypes_int2str,
bondtypes_int2str,
report_progress=False, # print messages to sys.stderr?
check_undefined_atomids_str = None):
"""
GenInteractions() automatically determines a list of interactions
present in a system of bonded atoms (argument "G_system"),
which satisfy the bond topology present in "g_bond_pattern", and
satisfy the atom and bond type requirements in "typepattern_to_coefftypes".
Whenever a set of atoms in "G_system" are bonded together in a way which
matches "g_bond_pattern", and when the atom and bond types is consistent
with one of the entries in "typepattern_to_coefftypes", the corresponding
list of atoms from G_system is appended to the list of results.
These results (the list of lists of atoms participating in an interaction)
are organized according their corresponding "coefftype", a string
which identifies the type of interaction they obey as explained above.
results are returned as a dictionary using "coefftype" as the lookup key.
Arguments:
-- typepattern_to_coefftypes is a list of 2-tuples --
The first element of the 2-tuple is the "typepattern".
It contains a string describing a list of atom types and bond types.
The typepattern is associated with a "coefftype",
which is the second element of the 2-tuple. This is a string
which identifies the type of interaction between the atoms.
Later on, this string can be used to lookup the force field
parameters for this interaction elsewhere.)
-- Arguments: G_system, g_bond_pattern, atomtypes_int2str, bondtypes_int2str --
G_system stores a list of atoms and bonds, and their attributes in
"Ugraph" format. In this format:
Atom ID numbers are represented by indices into the G_system.verts[] list.
Bond ID numbers are represented by indices into the G_system.edges[] list.
Atom types are represented as integers in the G_system.verts[i].attr list.
Bond types are represented as integers in the G_system.edges[i].attr list.
They are converted into strings using
atomtypes_int2str, and bondtypes_int2str.
g_bond_pattern is a graph which specifies the type of bonding between
the atoms required for a match. It is in Ugraph format (however the
atom and bond types are left blank.)
Atom and bond types are supplied by the user in string format. (These
strings typically encode integers, but could be any string in principle.)
The string-version of the ith atom type is stored in
atomtypes_int2str[ G_system.verts[i].attr ]
The string-version of the ith bond type is stored in
bondtypes_int2str[ G_system.edges[i].attr ]
-- The "canonical_order" argument: --
The search for atoms with a given bond pattern often yields
redundant matches. There is no difference for example
between the angle formed between three consecutively
bonded atoms (named, 1, 2, 3, for example), and the
angle between the same atoms in reverse order (3, 2, 1).
However both triplets of atoms will be returned by the subgraph-
matching algorithm when searching for ALL 3-body interactions.)
To eliminate this redundancy, the caller must supply a "canonical_order"
argument. This is a function which sorts the atoms and bonds in a way
which is consistent with the type of N-body interaction being considered.
The atoms (and bonds) in a candidate match are rearranged by the
canonical_order(). Then the re-ordered list of atom and bond ids is
tested against the list of atom/bond ids in the matches-found-so-far,
before it is added.
"""
if report_progress:
startatomid = 0
sys.stderr.write(' searching for matching bond patterns:\n')
sys.stderr.write(' 0%')
# Figure out which atoms from "G_system" bond together in a way which
# matches the "g_bond_pattern" argument. Organize these matches by
# atom and bond types and store all of the non-redundant ones in
# the "interactions_by_type" variable.
gm = GraphMatcher(G_system, g_bond_pattern)
interactions_by_type = defaultdict(list)
for atombondids in gm.Matches():
# "atombondids" is a tuple.
# atombondids[0] has atomIDs from G_system corresponding to g_bond_pattern
# (These atomID numbers are indices into the G_system.verts[] list.)
# atombondids[1] has bondIDs from G_system corresponding to g_bond_pattern
# (These bondID numbers are indices into the G_system.edges[] list.)
# It's convenient to organize the list of interactions-between-
# atoms in a dictionary indexed by atomtypes and bondtypes.
# (Because many atoms and bonds typically share the same type,
# organizing the results this way makes it faster to check
# whether a given interaction matches a "typepattern" defined
# by the user. We only have to check once for the whole group.)
atombondtypes = \
(tuple([G_system.GetVert(Iv).attr for Iv in atombondids[0]]),
tuple([G_system.GetEdge(Ie).attr for Ie in atombondids[1]]))
interactions_by_type[atombondtypes].append(atombondids)
if report_progress:
# GraphMatcher.Matches() searches for matches in an order
# that selects a different atomid number from G_system,
# starting at 0, and continuing up to the number of atoms (-1)
# in the system (G_system.nv-1), and using this as the first
# atom in the match (ie match[0][0]). This number can be used
# to guess much progress has been made so far.
oldatomid = startatomid
startatomid = atombondids[0][0]
percent_complete = (100 * startatomid) // G_system.GetNumVerts()
# report less often as more progress made
if percent_complete <= 4:
old_pc = (100 * oldatomid) // G_system.GetNumVerts()
if percent_complete > old_pc:
sys.stderr.write(' ' + str(percent_complete) + '%')
elif percent_complete <= 8:
pc_d2 = (100 * startatomid) // (2 * G_system.GetNumVerts())
oldpc_d2 = (100 * oldatomid) // (2 * G_system.GetNumVerts())
if pc_d2 > oldpc_d2:
sys.stderr.write(' ' + str(percent_complete) + '%')
elif percent_complete <= 20:
pc_d4 = (100 * startatomid) // (4 * G_system.GetNumVerts())
oldpc_d4 = (100 * oldatomid) // (4 * G_system.GetNumVerts())
if pc_d4 > oldpc_d4:
sys.stderr.write(' ' + str(percent_complete) + '%')
else:
pc_d10 = (100 * startatomid) // (10 * G_system.GetNumVerts())
oldpc_d10 = (100 * oldatomid) // (10 * G_system.GetNumVerts())
if pc_d10 > oldpc_d10:
sys.stderr.write(' ' + str(percent_complete) + '%')
if report_progress:
sys.stderr.write(' 100%\n')
#sys.stderr.write(' ...done\n')
#sys.stderr.write(' Looking up available atom and bond types...')
#coefftype_to_atomids = defaultdict(list)
#abids_to_coefftypes = defaultdict(list)
coefftype_to_atomids = OrderedDict()
abids_to_coefftypes = OrderedDict()
# -------------------- reporting progress -----------------------
if report_progress:
# The next interval of code is not technically necessary, but it makes
# the printed output easier to read by excluding irrelevant interactions
# Now, test each match to see if the atoms and bonds involved match
# any of the type-patterns in the "typepattern_to_coefftypes" argument.
types_atoms_all_str = set([])
types_bonds_all_str = set([])
for typepattern, coefftype in typepattern_to_coefftypes:
for atombondtypes, abidslist in interactions_by_type.items():
for Iv in atombondtypes[0]:
types_atoms_all_str.add(atomtypes_int2str[Iv])
for Ie in atombondtypes[1]:
types_bonds_all_str.add(bondtypes_int2str[Ie])
# ------------------ reporting progress (end) -------------------
# ------------------ check to make sure all interactions are defined ------
if check_undefined_atomids_str:
# Checking for missing interactions is a headache.
# Please excuse the messy code below.
atomids_matched = OrderedDict()
# Then loop through all the interactions (tuples of atoms) found by
# GraphMatcher, sort the atoms and store them in dictionary
# (atomids_matched) which keeps track of which interactions have
# been defined (ie have force-field parameters assigned to them).
# Initialize them to False, and update as interactions are found.
for atombondtypes, abidslist in interactions_by_type.items():
for abids in abidslist:
abids = canonical_order(abids)
atomids_int = tuple(abids[0])
# NOTE TO SELF:
# If in the future, different interactions (type_patterns) have
# different symmetries, and canonical_order() varies from
# interaction to interaction, then DONT loop over type_pattern:
# for type_pattern, coefftype in typepattern_to_coefftypes)
# abids = canonical_order(abids, type_pattern)
# Why: When checking for undefined interactions,
# we just want to make sure that SOME kind of interaction
# involving these atoms exists. The gruesome details of
# force-field symmetry should not enter into this.
# (We certainly don't want to require that different
# interactions are simultaneously present for the same set of
# atoms for ALL the possible different atom orderings for the
# different possible symmetries in the force field you are using
# Perhaps, in the future I should just use something like this:
# atomids_int = abids[0]
# atomids_int.sort()
# atomids_int = tuple(atomids_int)
# This would work for most molecules.
# I suppose that in some some bizarre molecules containing
# triangular or square cycles, for example, this would not
# distinguish all 3 angles in the triangle, for example.
# mistakenly thinking there was only one interaction there.
# But these cases are rare.)
if not atomids_int in atomids_matched:
atomids_matched[atomids_int] = False
# (Later on, we'll set some of these to True)
# ------------------ check to make sure all interactions are defined (end)
count = 0
for typepattern, coefftype in typepattern_to_coefftypes:
# ------------------ reporting progress -----------------------
# The next interval of code is not technically necessary, but it makes
# the printed output easier to read by excluding irrelevant
# interactions
if report_progress:
# Check to see if the atoms or bonds referred to in typepattern
# are (potentially) satisfied by any of the atoms present in the system.
# If any of the required atoms for this typepattern are not present
# in this system, then skip to the next typepattern.
atoms_available_Iv = [False for Iv in range(
0, g_bond_pattern.GetNumVerts())]
for Iv in range(0, g_bond_pattern.GetNumVerts()):
for type_atom_str in types_atoms_all_str:
if MatchesPattern(type_atom_str, typepattern[Iv]):
atoms_available_Iv[Iv] = True
atoms_available = True
for Iv in range(0, g_bond_pattern.GetNumVerts()):
if not atoms_available_Iv[Iv]:
atoms_available = False
bonds_available_Ie = [False for Ie in range(
0, g_bond_pattern.GetNumEdges())]
for Ie in range(0, g_bond_pattern.GetNumEdges()):
for type_bond_str in types_bonds_all_str:
if MatchesPattern(type_bond_str,
typepattern[g_bond_pattern.GetNumVerts() + Ie]):
bonds_available_Ie[Ie] = True
bonds_available = True
for Ie in range(0, g_bond_pattern.GetNumEdges()):
if not bonds_available_Ie[Ie]:
bonds_available = False
if atoms_available and bonds_available:
# Explanation:
# (Again) only if ALL of the atoms and bond requirements for
# this type pattern are satisfied by at least SOME of the atoms
# present in the this system, ...THEN print a status message.
# (Because for complex all-atom force-fields, the number of
# possible atom types, and typepatterns far exceeds the number
# of atom types typically present in the system. Otherwise
# hundreds of kB of irrelevant information can be printed.)
sys.stderr.write(' checking ' + coefftype + ' type requirements:'
#' (atom-types,bond-types) '
'\n ' + str(typepattern) + '\n')
# ------------------ reporting progress (end) -------------------
for atombondtypes, abidslist in interactions_by_type.items():
# express atom & bond types in a tuple of the original string
# format
types_atoms = [atomtypes_int2str[Iv] for Iv in atombondtypes[0]]
types_bonds = [bondtypes_int2str[Ie] for Ie in atombondtypes[1]]
type_strings = types_atoms + types_bonds
# use string comparisons to check for a match with typepattern
if MatchesAll(type_strings, typepattern): # <-see "ttree_lex.py"
for abids in abidslist:
# Re-order the atoms (and bonds) in a "canonical" way.
# Only add new interactions to the list after re-ordering
# them and checking that they have not been added earlier.
# (...well not when using the same coefftype at least.
# This prevents the same triplet of atoms from
# being used to calculate the bond-angle twice:
# once for 1-2-3 and 3-2-1, for example.)
abids = canonical_order(abids)
redundant = False
if abids in abids_to_coefftypes:
coefftypes = abids_to_coefftypes[abids]
if coefftype in coefftypes:
redundant = True
if check_undefined_atomids_str:
atomids_int = tuple(abids[0])
atomids_matched[atomids_int] = True
if not redundant:
# (It's too bad python does not
# have an Ordered defaultdict)
if coefftype in coefftype_to_atomids:
coefftype_to_atomids[coefftype].append(abids[0])
else:
coefftype_to_atomids[coefftype] = [abids[0]]
if abids in abids_to_coefftypes:
abids_to_coefftypes[abids].append(coefftype)
else:
abids_to_coefftypes[abids] = [coefftype]
count += 1
if report_progress:
sys.stderr.write(' (found ' +
str(count) + ' non-redundant matches)\n')
if check_undefined_atomids_str:
for atomids_int, found_match in atomids_matched.items():
if not found_match:
atomids_str = [check_undefined_atomids_str[Iv]
for Iv in atomids_int]
raise InputError('Error: A bonded interaction should exist between atoms:\n' +
' ' +
(',\n '.join(atomids_str)) + '\n' +
' ...however no interaction between these types of atoms has been defined\n' +
' (If you are using a force field, then it probably means that you made a\n'
' mistake choosing at least one of these two @atom types from the list\n'
' of available atom types supplied by the force field. To fix it, edit\n'
' the corresponding lines in the "Data Atoms" section of your LT file.\n'
' If this is not the case, then you can override this error message by\n'
' invoking moltemplate.sh without the \"-checkff\" argument.)\n')
return coefftype_to_atomids
def GenInteractions_str(bond_pairs,
g_bond_pattern,
typepattern_to_coefftypes,
canonical_order, # function to sort atoms and bonds
atomids_str,
atomtypes_str,
bondids_str,
bondtypes_str,
report_progress=False, # print messages to sys.stderr?
check_undefined=False):
assert(len(atomids_str) == len(atomtypes_str))
assert(len(bondids_str) == len(bondtypes_str))
# The atomids and atomtypes and bondtypes are strings.
# First we assign a unique integer id to each string.
atomids_str2int = {}
atomtypes_str2int = {}
atomtypes_int2str = []
atomtype_int = 0
for i in range(0, len(atomids_str)):
if atomids_str[i] in atomids_str2int:
raise InputError('Error: multiple atoms have the same id (' +
str(atomids_str[i]) + ')')
atomids_str2int[atomids_str[i]] = i
#atomtypes_int = len(atomtypes_int)+1
if (not (atomtypes_str[i] in atomtypes_str2int)):
atomtypes_str2int[atomtypes_str[i]] = atomtype_int
atomtypes_int2str.append(atomtypes_str[i])
atomtype_int += 1
# atomtypes_int.append(atomtype_int)
bondids_str2int = {}
bondtypes_str2int = {}
bondtypes_int2str = []
bondtype_int = 0
for i in range(0, len(bondids_str)):
if bondids_str[i] in bondids_str2int:
raise InputError('Error: multiple bonds have the same id (' +
str(bondids_str[i]) + ')')
bondids_str2int[bondids_str[i]] = i
#bondtype_int = len(bondtypes_int)+1
if (not (bondtypes_str[i] in bondtypes_str2int)):
bondtypes_str2int[bondtypes_str[i]] = bondtype_int
bondtypes_int2str.append(bondtypes_str[i])
bondtype_int += 1
# Now convert "bond_pairs" into the UGraph format
G_system = Ugraph()
for iv in range(0, len(atomtypes_str)):
G_system.AddVertex(iv, atomtypes_str2int[atomtypes_str[iv]])
for ie in range(0, len(bond_pairs)):
atomid1_str = bond_pairs[ie][0]
atomid2_str = bond_pairs[ie][1]
if (atomid1_str not in atomids_str2int):
raise InputError('Error in Bonds Section:\n'
' ' + atomid1_str + ' is not defined in Atoms section\n')
if (atomid2_str not in atomids_str2int):
raise InputError('Error in Bonds Section:\n'
' ' + atomid2_str + ' is not defined in Atoms section\n')
G_system.AddEdge(atomids_str2int[atomid1_str],
atomids_str2int[atomid2_str],
bondtypes_str2int[bondtypes_str[ie]])
coefftype_to_atomids_int = GenInteractions_int(G_system,
g_bond_pattern,
typepattern_to_coefftypes,
canonical_order,
atomtypes_int2str,
bondtypes_int2str,
report_progress,
(atomids_str if check_undefined else None))
coefftype_to_atomids_str = OrderedDict()
for coefftype, atomidss_int in coefftype_to_atomids_int.items():
if report_progress:
sys.stderr.write(' processing coefftype: ' +
str(coefftype) + '\n')
for atomids_int in atomidss_int:
if coefftype in coefftype_to_atomids_str:
coefftype_to_atomids_str[coefftype].append(
[atomids_str[iv] for iv in atomids_int])
else:
coefftype_to_atomids_str[coefftype] = \
[[atomids_str[iv] for iv in atomids_int]]
# gc.collect()
return coefftype_to_atomids_str