-
Notifications
You must be signed in to change notification settings - Fork 28
/
merge.py
executable file
·1096 lines (999 loc) · 40.3 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Takes *.mae structure files and merges them.
The input *.mae files contain information pertinent to merging. This information
is stored as Schrödinger structure properties and is described below. This
information is manually entered by editing the *.mae files.
Structure Properties
--------------------
s_cs_pattern - SMILES used to pick out the matching atoms. This will
actually accept any property containing "pattern".
b_cs_first_match_only - SMILES can often be matched in two directions, say
atoms 1-2-3 or 3-2-1 (ex. P-[Rh]-P is a palindrome).
If this is false (0), it merges the structures using
both palindromes. If it's true (1), it only uses
whichever direction it matches first.
b_cs_substructure - If true, use `evaluate_substructure` to find atom
indices from the pattern, else use `evaluate_smarts`.
b_cs_both_enantionmers - If true, will also use the other enantiomer of this
structure. It does this by simply inverting all of the
x coordinates for the atoms.
b_cs_full_match_only - If the pattern uses optional atoms, and some of those
optional atoms don't match (return atom number 0), then
don't include those matches.
Atom Properties
---------------
Fill in later. For now, refer to setup_com_from_mae and setup_mae_from_com.
Bond Properties
---------------
See atom properties.
"""
# This whole script could be made a lot more efficient by creating a class
# containing struct_1 and struct_2 and creating some sort of dictionary to look
# up the atom indices in the merged structure. This whole process is repeated
# too often.
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
import argparse
import copy
import os
import re
import sys
from schrodinger import structure as sch_struct
from schrodinger.structutils import analyze, measure, rmsd, build
ATOMS_TO_MOVE = ['RU','IR','RH','D1']
DEBUG = False
def return_parser():
"""
Parser for merge.
"""
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'-g', '--group',
type=str, nargs='+', action='append',
help='Groups of structures to merge.')
parser.add_argument(
'-o', '--output',
type=str,
help='Write all output structures to one file.')
parser.add_argument(
'-d', '--directory',
type=str,
help='Write all output structures individually to this directory.')
parser.add_argument(
'-m', '--mini',
action='store_true',
help='Attempt to minimize merged structures using MacroModel and '
'MM3*. This is a completely unrestrained minimization with the final, '
'completed, merged structures. Several other optimizations will take '
'place during the merging procedure regardless of whether this option '
'is used. I have found that this option can sometimes be harmful. Use '
'it at your own risk. It does have potential though. Just requires '
'some more testing before I\'ll recommend it.')
return parser
def main(opts):
"""
Main for merge.
Returns
-------
list of Schrodinger structures
"""
structures = merge_many_filenames(opts.group)
print('-' * 80)
print('END NUMBER STRUCTURES: {}'.format(len(structures)))
if opts.mini:
structures = mini(structures)
new_structures = []
for structure in structures:
new_structures.append(add_chirality(structure))
structures = new_structures
# All output below.
# Write structures to a single file.
if opts.output:
print('OUTPUT FILE: {}'.format(opts.output))
sch_writer = sch_struct.StructureWriter(opts.output)
sch_writer.extend(structures)
sch_writer.close()
# Write structures to a directory.
if opts.directory:
print('OUTPUT DIRECTORY: {}'.format(opts.directory))
for structure in structures:
path = make_unique_filename(
os.path.join(
opts.directory,
structure.property['s_m_title'] + '.mae'))
filename = os.path.basename(path)
print(' * WRITING : {}'.format(filename))
sch_writer = sch_struct.StructureWriter(path)
sch_writer.append(structure)
sch_writer.close()
return structures
def merge_many_filenames(list_of_lists):
"""
Returns merged structures for a list of lists of filenames.
Arguments
---------
list_of_lists : list of lists of filenames of *.mae
"""
# Setup list for first group of filenames/structures.
structures = []
for filename in list_of_lists[0]:
structures.extend(read_filename(filename))
print('TOTAL NUM. STRUCTURES: {}'.format(len(structures)))
# Iterate over groups of filenames/structures.
for filenames in list_of_lists[1:]:
new_structures = []
for filename in filenames:
new_structures.extend(read_filename(filename))
# Update existing list of structures after combining with the new
# structures.
structures = list(merge_many_structures(structures, new_structures))
print('TOTAL NUM. STRUCTURES: {}'.format(len(structures)))
return list(structures)
def read_filename(filename):
"""
Just helps with the logging.
Arguments
---------
filename : string
Returns
-------
list of Schrodinger structure objects
"""
print('>>> filename: {}'.format(filename))
structures = []
sch_reader = sch_struct.StructureReader(filename)
for structure in sch_reader:
for enantiomer in load_enantiomers(structure):
structures.append(enantiomer)
sch_reader.close()
print('{} : {} structures (including enantiomers)'.format(
filename, len(structures)))
return structures
def load_enantiomers(structure):
"""
Generator to yield both enantiomers of a structure.
Yields both enantiomers depending on the property b_cs_both_enantiomers.
Enantiomers are generated by inversing the x coordinate of every atom.
b_cs_both_enantiomers
* True - Yield input structure and enantiomer
* False - Yield input structure
Arguments
---------
structure : Schrödinger structure object
Yields
------
Schrödinger structure objects
"""
# I dunno if I like this style. Seems to work though.
yield structure
if structure.property.get('b_cs_both_enantiomers', False):
print(' - LOADING OTHER ENANTIOMER: {}'.format(
structure.property['s_m_title']))
other_enantiomer = copy.deepcopy(structure)
for coords in other_enantiomer.getXYZ(copy=False):
coords[0] = -coords[0]
structures = [other_enantiomer]
yield structures[0]
def merge_many_structures(structures_1, structures_2):
"""
Iterator for combining two lists of structures.
Arguments
---------
structures_1 : list of Schrodinger structures
structures_2 : list of Schrodinger structures
Yields
------
Schrodinger structure
"""
for structure_1 in structures_1:
for structure_2 in structures_2:
for structure in merge(structure_1, structure_2):
yield structure
def merge(struct_1, struct_2):
"""
Takes two Schrödinger structures and combines them.
Uses structure properties containing the string "pattern" to determine which
atoms to overlap. If there are multiple pattern matches, it will try to use
all of them.
Arguments
---------
struct_1 : Schrödinger structure object
struct_2 : Schrödinger structure object
Yields
------
Schrödinger structure objects
"""
# Determine the structures that overlap.
match_1s, match_2s = get_overlapping_atoms_in_both(struct_1, struct_2)
print('MATCHES FROM STRUCTURE 1: {}'.format(match_1s))
print('MATCHES FROM STRUCTURE 2: {}'.format(match_2s))
seen = set()
for match_1 in match_1s:
# Eliminate duplicates 1st.
for match_2 in match_2s:
print('-' * 80)
# Remove the zero's if they exist.
new_match_1, new_match_2 = remove_index_from_both_if_equals_zero(
match_1, match_2)
print('TRIMMED: {} {}'.format(new_match_1, new_match_2))
tup = tuple(new_match_2)
if tup not in seen:
seen.add(tup)
if struct_2.property.get('b_cs_first_match_only', False):
seen.add(tup[::-1])
print(' - Unique! Continuing.')
# Just to look good.
print('-' * 80)
print(' * ALIGNING:')
print(' * {:<30} {} {}'.format(
struct_1.title,
new_match_1,
[struct_1.atom[x].atom_type_name for x in new_match_1]))
print(' * {:<30} {} {}'.format(
struct_2.title,
new_match_2,
[struct_2.atom[x].atom_type_name for x in new_match_2]))
# Real work below.
rmsd.superimpose(struct_1, new_match_1, struct_2, new_match_2)
yield merge_structures_from_matching_atoms(
struct_1, new_match_1, struct_2, new_match_2)
else:
print(' - Not unique! Skipping.')
def get_overlapping_atoms_in_both(struct_1, struct_2):
"""
Uses properties stored inside the 2nd structure to locate a set or sets of
matching atoms inside both structures.
This will use all patterns that are located. As long as the structure
property contains the string "pattern", it will attempt to locate those
atoms. As an example, I frequently employ `s_cs_pattern`, although this
could be extended to `s_cs_pattern_1`, `s_cs_pattern_2`, etc.
Arguments
---------
struct_1 : Schrödinger structure object
struct_2 : Schrödinger structure object
Returns
-------
match_struct_1 : list of list of integers
match_struct_2 : list of list of integers
"""
# Use the patterns from struct_2.
patterns = list(search_dic_keys(struct_2.property, 'pattern'))
# Determine whether to use analyze.evaluate_smarts or
# analyze.evaluate_substructure from struct_2 (this needs to match the
# pattern from struct_2).
use_substructure = struct_2.property.get('b_cs_use_substructure', False)
print(' * PATTERNS: {}'.format(patterns))
for pattern in patterns:
print(' * CHECKING: {}'.format(pattern))
match_struct_1 = get_atom_numbers_from_structure_with_pattern(
struct_1,
pattern,
first_match_only=struct_1.property.get(
'b_cs_first_match_only', False),
use_substructure=use_substructure)
if match_struct_1:
print(' * FOUND IN: {}'.format(struct_1.title))
match_struct_2 = get_atom_numbers_from_structure_with_pattern(
struct_2,
pattern,
first_match_only=struct_2.property.get(
'b_cs_first_match_only', False),
use_substructure=use_substructure)
if match_struct_2:
print(' * FOUND IN: {}'.format(struct_2.title))
break
else:
print(' * COULDN\'T FIND IN: {}'.format(struct_2.title))
continue
# This is an interesting way to ensure we have actually found something for
# match_struct_1 and match_struct_2.
try:
match_struct_1
match_struct_2
except UnboundLocalError as e:
print('ERROR: {} {}'.format(
struct_1.property['s_m_title'],
struct_2.property['s_m_title']))
raise e
# 1.) Eliminate all with zero if b_cs_full_match_only.
print('>>> match_struct_1: {}'.format(match_struct_1))
print('>>> match_struct_2: {}'.format(match_struct_2))
if struct_1.property.get('b_cs_full_match_only', False):
new_match_struct_1 = []
for match in match_struct_1:
if 0 not in match:
new_match_struct_1.append(match)
else:
new_match_struct_1 = match_struct_1
if struct_2.property.get('b_cs_full_match_only', False):
new_match_struct_2 = []
for match in match_struct_2:
if 0 not in match:
new_match_struct_2.append(match)
else:
new_match_struct_2 = match_struct_2
print('>>> new_match_struct_1: {}'.format(new_match_struct_1))
print('>>> new_match_struct_2: {}'.format(new_match_struct_2))
# Sometimes a match is made that isn't what is wanted by the user and
# incorporates an aromatic where it should not be. This prevents aryl
# aromatic rings that have 2 or more atoms in a match to be used as a
# match. -TR
new_new_match_struct_1 = []
for match in new_match_struct_1:
use_match = True
for ring in struct_1.ring:
atoms_in_ring = ring.getAtomList()
matched_atoms_in_ring = 0
for atom in match:
if atom in atoms_in_ring:
matched_atoms_in_ring += 1
if matched_atoms_in_ring >= 2:
if ring.isAromatic():
if not struct_1.property.get('b_cs_accept_aromatic', False):
use_match = False
if use_match:
new_new_match_struct_1.append(match)
new_new_match_struct_2 = []
for match in new_match_struct_2:
use_match = True
for ring in struct_2.ring:
atoms_in_ring = ring.getAtomList()
matched_atoms_in_ring = 0
for atom in match:
if atom in atoms_in_ring:
matched_atoms_in_ring += 1
if matched_atoms_in_ring >= 2:
if ring.isAromatic():
if not struct_2.property.get('b_cs_accept_aromatic', False):
use_match = False
if use_match:
new_new_match_struct_2.append(match)
print('>>> new_new_match_struct_1: {}'.format(new_new_match_struct_1))
print('>>> new_new_match_struct_2: {}'.format(new_new_match_struct_2))
# 2.) Eliminate zeroes indices from within lists.
# Actually, maybe it's best to do this upon application in a case by case
# basis.
# This only worked if the lists were the same length.
# print('>>> match_struct_1: {}'.format(match_struct_1))
# print('>>> match_struct_2: {}'.format(match_struct_2))
# new_match_struct_1 = []
# new_match_struct_2 = []
# for sub_match_struct_1, sub_match_struct_2 in itertools.izip(
# match_struct_1, match_struct_2):
# sub_match_struct_1, sub_match_struct_2 = \
# remove_index_from_both_if_equals_zero(
# sub_match_struct_1, sub_match_struct_2)
# new_match_struct_1.append(sub_match_struct_1)
# new_match_struct_2.append(sub_match_struct_2)
# print('>>> new_match_struct_1: {}'.format(new_match_struct_1))
# print('>>> new_match_struct_2: {}'.format(new_match_struct_2))
return new_new_match_struct_1, new_new_match_struct_2
def get_atom_numbers_from_structure_with_pattern(structure,
pattern,
first_match_only=False,
use_substructure=False):
"""
Gets the atom indices inside a structure that match a pattern.
Takes care of two subtle intricacies.
1. Schrödinger has two methods to match atoms inside of a structure. The
argument `use_substructure` selects whether to use
`schrodinger.structutils.analyze.evaluate_substructure` or
`schrodinger.structutils.analyze.evaluate_smarts`. One or the other may
be more convenient for your system.
2. The pattern may match multiple times in a structure. The argument
`first_match_only` chooses whether to use all of the matches or just the
first one.
Arguments
---------
structure : Schrödinger structure object
pattern : string
first_match_only : bool
use_substructure : bool
Returns
-------
list of integers
"""
# print(">>> structure: {}".format(structure))
# print(">>> pattern: {}".format(pattern))
# print(">>> first_match_only: {}".format(first_match_only))
# print(">>> use_substructure: {}".format(use_substructure))
if use_substructure:
atom_numbers = analyze.evaluate_substructure(
structure,
pattern,
first_match_only=first_match_only)
# I'm not sure how the logic for Schrodinger's first match function
# works, but in the case for evaluate_substructure() it does not
# always returns a single match. The following logic should correct
# this without distrubing anything else. - Tony
# Wonder if we should move this check outside the if/else in case the
# same behavior happens with evaluate_smarts(). - Eric
if first_match_only and len(atom_numbers) > 1:
del atom_numbers[1:]
else:
atom_numbers = analyze.evaluate_smarts(
structure,
pattern,
unique_sets=first_match_only)
return atom_numbers
def remove_index_from_both_if_equals_zero(a, b):
"""
Scans over two lists. If any elements equal False/None, then remove that
index from both lists.
Arguments
---------
a : list (of integers usually)
b : list (of integers usually)
Returns
-------
a : list
b : list
"""
assert len(a) == len(b), "Lists must be of same length!"
# This prevents the code from breaking if the user supplies a and b as the
# same object. It would probably be even better to check if they're the
# exact same object than create new objects.
a = copy.deepcopy(a)
b = copy.deepcopy(b)
for i in range(len(a) - 1, -1, -1):
if not a[i] or not b[i]:
a.pop(i)
b.pop(i)
return a, b
def merge_structures_from_matching_atoms(struct_1, match_1, struct_2, match_2):
"""
Combines two structures.
Structures should already be superimposed.
THIS IS THE HEART OF THIS PYTHON MODULE. DOES SO MUCH STUFF. SUCH WOW.
If you're smart, it might be nice to encapsulate this a bit more. It may do
too much right now, making it cumbersome to manipulate.
Arguments
---------
struct_1 : Schrödinger structure
match_1 : list of integers
Atom indices for the superimposed atoms in struct_1
struct_2 : Schrödinger structure
match_2 : list of integers
Atom indices for the superimposed atoms in struct_2
Returns
-------
Schrödinger structure
"""
merge = struct_1.merge(struct_2, copy_props=True)
# Number of atoms in original structure.
num_atoms = len(struct_1.atom)
common_atoms_2 = [x + num_atoms for x in match_2]
common_atoms_2 = [merge.atom[x] for x in common_atoms_2]
common_atoms_1 = [merge.atom[x] for x in match_1]
print(' * AFTER MERGE:')
print(' * {:<30} {} {}'.format(
struct_2.title,
[x.index for x in common_atoms_2],
[x.atom_type_name for x in common_atoms_2]))
print('ATOMS IN ORIGINAL STRUCTURE: {:>5}'.format(num_atoms))
print('ATOMS IN MERGED STRUCTURE: {:>5}'.format(len(merge.atom)))
print('ATOMS IN NEW STRUCTURE: {:>5}'.format(
len(merge.atom) - len(common_atoms_1)))
print('-' * 80)
# Look at all the common atoms in struct_2.
for i, (common_atom_1, common_atom_2) in enumerate(
zip(common_atoms_1, common_atoms_2)):
print('CHECKING COMMON ATOM {}:'.format(i + 1))
print(' * ORIGINAL ATOM: {:>4}/{}'.format(
common_atom_1.index,
common_atom_1.atom_type_name))
for original_bond in common_atom_1.bond:
print(' * BOND: {:>4}/{} {:>4}/{}'.format(
original_bond.atom1.index, original_bond.atom1.atom_type_name,
original_bond.atom2.index, original_bond.atom2.atom_type_name))
print(' * MATCHING ATOM: {:>4}/{}'.format(
common_atom_2.index,
common_atom_2.atom_type_name))
# If a user is templating struct2 onto struct1, but struct1 has a wild
# card indicated for one of the matching atoms then we need to replace
# the atom type with that of struct2. This allows more variablity and
# flexibility to merge. I still forsee many problems. -TR
if common_atom_1.atom_type == 64:
common_atom_2.atom_type = common_atom_1.atom_type
common_atom_1.color = common_atom_2.color
#common_atom_1.atom_type = common_atom_2.atom_type
common_atom_1.color = common_atom_2.color
#For somereason the rest of the properties of the atom class aren't
#updated when the atom_type is changed. Here we set the atom type to
#what the atom type is, which is redundant, but it resets the atomic
#number and weight. The AtomName isn't important, but it makes it look
#pretty in the final mae file.
common_atom_1.atom_name = (str(common_atom_1.element) + str(common_atom_1.index))
#common_atom_1._setAtomType(common_atom_1.atom_type)
#common_atom_1._setAtomName(str(common_atom_1.element) + str(common_atom_1.index))
# Below are alternatives options for which atoms to keep. Currently, the
# coordinates of the atoms from struct_1 are kept.
# Okay, let's try copying the XYZ from common_atom_2 into common_atom_1.
# This fixes some problems, but causes some other problems.
# common_atom_1.x = common_atom_2.x
# common_atom_1.y = common_atom_2.y
# common_atom_1.z = common_atom_2.z
# This keeps the average coordinates.
# common_atom_1.x = (common_atom_1.x + common_atom_2.x) / 2
# common_atom_1.y = (common_atom_1.y + common_atom_2.y) / 2
# common_atom_1.z = (common_atom_1.z + common_atom_2.z) / 2
for merge_bond in common_atom_2.bond:
print(' * BOND: {:>4}/{} {:>4}/{}'.format(
merge_bond.atom1.index, merge_bond.atom1.atom_type_name,
merge_bond.atom2.index, merge_bond.atom2.atom_type_name))
# This is the atom in struct_1 that matches the 1st atom of the bond
# in struct_2.
# Actually, is this necessary? Isn't this just common_atom_1?
atom1 = common_atoms_1[
common_atoms_2.index(merge_bond.atom1)]
# These bonds already exist in the original structure.
# UPDATE: Just kidding! Jess found a case where the bond doesn't
# exist in the original structure. See q2mm/q2mm issue #40.
# We want to copy any new properties from the bonds in the merged
# structure into the original bonds.
if merge_bond.atom2 in common_atoms_2:
atom2 = common_atoms_1[
common_atoms_2.index(merge_bond.atom2)]
# Bond that we want to copy properties to.
# Surprise! This can return None. You'd think that should raise
# an exception.
bond = merge.getBond(atom1, atom2)
if bond is None:
atom1.addBond(atom2.index, merge_bond.order)
print(' * ADDED: {:>4}/{} {:>4}/{}'.format(
atom1.index, atom1.atom_type_name,
atom2.index, atom2.atom_type_name))
else:
print(' * UPDATED: {:>4}/{} {:>4}/{}'.format(
atom1.index, atom1.atom_type_name,
atom2.index, atom2.atom_type_name))
# If the bond doesn't exist in struct_1, we want to make a new one.
else:
atom2 = merge_bond.atom2
atom1.addBond(atom2.index, merge_bond.order)
print(' * ADDED: {:>4}/{} {:>4}/{}'.format(
atom1.index, atom1.atom_type_name,
atom2.index, atom2.atom_type_name))
bond = merge.getBond(atom1, atom2)
for k, v in merge_bond.property.items():
# Here, bond is the duplicate bond in struct_1 or the new bond.
if k not in bond.property or not bond.property[k]:
# For i_cs_rca4_1, i_cs_rca4_2, i_cs_torc_a1, etc.
# these values may not be correct. That's okay though! They
# get fixed soon by the function add_bond_prop. What's
# important is that values like i_cs_torc_a5 get copied.
# (because these values don't get changed by atom number).
bond.property.update({k: v})
#if common_atom_1.atom_type != common_atom_2.atom_type:
#common_atom_1.atom_type = common_atom_2.atom_type
#common_atom_1.color = common_atom_2.color
print('-' * 80)
fix_torsions = get_torc(struct_1, struct_2, match_1, match_2)
print('TORSION RESTRAINTS: {}'.format(fix_torsions))
# Delete duplicate atoms once you copied all the data.
merge.deleteAtoms(common_atoms_2)
# This code is so dumb.
merge = add_bond_prop(merge,
struct_1, match_1,
struct_2, match_2,
name='rca4')
merge = add_bond_prop(merge,
struct_1, match_1,
struct_2, match_2,
name='torca')
merge = add_bond_prop(merge,
struct_1, match_1,
struct_2, match_2,
name='torcb')
merge.property['s_m_title'] += '_' + struct_2.property['s_m_title']
if hasattr(struct_2, 's_m_entry_name'):
merge.property['s_m_entry_name'] += \
'_' + struct_2.property['s_m_entry_name']
# I have encoutered problems in macromodel for metal centers with
# multiple bond (e.g. M-Cp or M-Ph). In order for macromodel to read
# these structures, the metal and dummy atoms have to be at the end
# of the atom listing (this is a really silly bug). This code rearranges
# the atom in this correct order. Interest refers to the atoms of
# interest to reorder.
reordered_atoms = []
original_wo_interest = []
interest = []
for atom in merge.atom:
if atom.atom_type_name in ATOMS_TO_MOVE:
interest.append(atom.index)
else:
original_wo_interest.append(atom.index)
reordered_atoms.extend(original_wo_interest)
reordered_atoms.extend(interest)
merge = build.reorder_atoms(merge,reordered_atoms)
# print(reordered_atoms)
for bond in merge.bond:
for prop in bond.property:
initial_index = bond.property[prop]
if 'i_cs' in prop and initial_index:
for i,old_atom_index in enumerate(reordered_atoms):
if old_atom_index == initial_index:
bond.property[prop] = i+1
frozen_atoms = []
for frozen_atom_index in range(1, num_atoms+1):
initial_index = frozen_atom_index
for i,old_atom_index in enumerate(reordered_atoms):
if old_atom_index == initial_index:
frozen_atoms.append(i+1)
# Minimize the structure.
# Freeze atoms in struct_1.
# Also enforce the TORC commands.
merge = mini(
[merge],
frozen_atoms=range(1, num_atoms + 1),
#frozen_atoms=frozen_atoms,
fix_torsions=fix_torsions)[0]
# Short conformational sampling.
merge = mcmm(
[merge],
frozen_atoms=range(1, num_atoms + 1))[0]
#frozen_atoms=frozen_atoms)[0]
# # Do another minimization, this time without frozen atoms.
#if fix_torsions:
# merge = mini(
# [merge],
# fix_torsions=fix_torsions)[0]
merge = mini(
[merge],
fix_torsions=fix_torsions)[0]
merge = mcmm([merge])[0]
return merge
def add_chirality(structure):
"""
Uses Schrödinger's utilities to assign chirality to atoms, and then adds
that information to the title and entry name of structures.
"""
chirality_dic = analyze.get_chiral_atoms(structure)
string = '_'
for key, value in chirality_dic.items():
string += '{}{}'.format(key, value.lower())
structure.property['s_m_title'] += string
structure.property['s_m_entry_name'] += string
return structure
def search_dic_keys(dic, lookup):
"""
Takes a string, looks up all the dictionary keys that contain that string
and returns the corresponding value.
Arguments
---------
dic : dictionary
lookup : string
"""
for key, value in dic.items():
if lookup in key:
yield value
def get_torc(struct_1, struct_2, match_1, match_2):
"""
Generates FXTA commands for MacroModel from TORC commands.
Very repetitive code. Whole thing should get an update when a better atom to
atom conversion is used.
"""
num_atoms = len(struct_1.atom)
new_match_2 = [x + num_atoms for x in match_2]
fix_torsions = []
for bond in struct_2.bond:
if bond.property['i_cs_torc_a1']:
atoms = [
bond.property['i_cs_torc_a1'],
bond.atom1.index,
bond.atom2.index,
bond.property['i_cs_torc_a4']
]
torsion = struct_2.measure(
atoms[0], atoms[1], atoms[2], atoms[3])
# Update atom numbers.
new_atoms = []
for atom in atoms:
if atom in match_2:
new_atoms.append(match_1[match_2.index(atom)])
else:
new_index = atom + num_atoms
atoms_in_str_2_before_this_one = \
sum(i < new_index for i in new_match_2)
new_index -= atoms_in_str_2_before_this_one
new_atoms.append(new_index)
fix_torsions.append((
new_atoms[0],
new_atoms[1],
new_atoms[2],
new_atoms[3],
torsion))
# Both can happen.
if bond.property['i_cs_torc_b1']:
atoms = [
bond.property['i_cs_torc_b1'],
bond.atom1.index,
bond.atom2.index,
bond.property['i_cs_torc_b4']
]
torsion = struct_2.measure(
atoms[0], atoms[1], atoms[2], atoms[3])
# Update atom numbers.
new_atoms = []
for atom in atoms:
if atom in match_2:
new_atoms.append(match_1[match_2.index(atom)])
else:
new_index = atom + num_atoms
atoms_in_str_2_before_this_one = \
sum(i < new_index for i in new_match_2)
new_index -= atoms_in_str_2_before_this_one
new_atoms.append(new_index)
fix_torsions.append((
new_atoms[0],
new_atoms[1],
new_atoms[2],
new_atoms[3],
torsion))
return fix_torsions
def add_bond_prop(merge, struct_1, match_1, struct_2, match_2, name=None):
"""
Takes the RCA4 and TORC properties from two structures and properly combines
them into the merged structures.
RCA4 and TORC properties are stored in Schrödinger bond properties:
* i_cs_rca4_1
* i_cs_rca4_2
* i_cs_torc_a1
* i_cs_torc_a4
* r_cs_torc_a5
* r_cs_torc_a6
* i_cs_torc_b1
* i_cs_torc_b4
* r_cs_torc_b5
* r_cs_torc_b6
Arguments
---------
merge : Schrödinger structure object
This is the result of merging struct_1 and struct_2 using match_1
and match_2 as patterns.
struct_1 : Schrödinger structure object
match_1 : string
struct_2 : Schrödinger structure object
match_2 : string
name : string
"rca4", "torca", or "torcb"
Returns
-------
merge : Updated bonds with new RCA4 properties
"""
if name == 'rca4':
string = 'RCA4'
str1 = 'i_cs_rca4_1'
str2 = 'i_cs_rca4_2'
elif name == 'torca':
string = 'TORC'
str1 = 'i_cs_torc_a1'
str2 = 'i_cs_torc_a4'
elif name == 'torcb':
string = 'TORC'
str1 = 'i_cs_torc_b1'
str2 = 'i_cs_torc_b4'
lists_of_atoms = []
for bond in struct_2.bond:
try:
bond.property[str1]
bond.property[str2]
except KeyError as e:
print('ERROR! NO {}: {}'.format(
string, struct_2.property['s_m_title']))
raise e
if bond.property[str1]:
atoms = [bond.property[str1],
bond.atom1.index,
bond.atom2.index,
bond.property[str2]]
lists_of_atoms.append(atoms)
print('{}: {}'.format(string, lists_of_atoms))
# Now update the RCA4 and TORC atom indices to match the structure post
# merging and deletion.
# Need this for later.
num_atoms = len(struct_1.atom)
# These are the new atom numbers for the common atoms in struct_2.
new_match_2 = [x + num_atoms for x in match_2]
# Contains new RCA4 commands.
new_lists_of_atoms = []
for atoms in lists_of_atoms:
new_atoms = []
for x in atoms:
if x in match_2:
# If the RCA4/TORC atom is one of the matching/duplicate/common
# atoms, it's going to get deleted. This replaces the index of
# that atom with the matching atom in the 1st structure.
new_atoms.append(match_1[match_2.index(x)])
else:
# The atoms in 2nd structure will always be added after the
# atoms in the 1st structure. This adjusts the atom indices
# appropriately.
new_index = x + num_atoms
# If matching/duplicate/common atoms occur in the list before
# this one, those atoms are going to get deleted. We need to
# account for them disappearing.
atoms_in_str_2_before_this_one = \
sum(i < new_index for i in new_match_2)
new_index -= atoms_in_str_2_before_this_one
new_atoms.append(new_index)
new_lists_of_atoms.append(new_atoms)
print('{} NEW: {}'.format(string, new_lists_of_atoms))
print(' * UPDATING {}:'.format(string))
# Now have to update the bonds RCA4 properties.
for atoms in new_lists_of_atoms:
bond = merge.getBond(atoms[1], atoms[2])
print(' * BOND: {:>4} {:>4}/{:2} {:>4}/{:2} {:>4}'.format(
bond.property[str1],
bond.atom1.index,
bond.atom1.atom_type_name,
bond.atom2.index,
bond.atom2.atom_type_name,
bond.property[str2]))
bond.property[str1] = atoms[0]
bond.property[str2] = atoms[3]
print(' * UPDATE: '
'{:>4}/{:2} {:>4}/{:2} {:>4}/{:2} {:>4}/{:2}'.format(
merge.atom[bond.property[str1]].index,
merge.atom[bond.property[str1]].atom_type_name,
bond.atom1.index,
bond.atom1.atom_type_name,
bond.atom2.index,
bond.atom2.atom_type_name,
merge.atom[bond.property[str2]].index,
merge.atom[bond.property[str2]].atom_type_name))
return merge
def mini(structures, frozen_atoms=None, fix_torsions=None):
"""
Takes many structures, minimizes them and returns the minimized structures.
It's faster to do multiple structures at once.
Arguments
---------
structure : list of Schrödinger structure
Returns
-------
list of Schrödinger structure
"""
import schrodinger.application.macromodel.utils as mmodutils
import schrodinger.job.jobcontrol as jobcontrol
from setup_com_from_mae import MyComUtil
print(' - ATTEMPTING MINI')
sch_writer = sch_struct.StructureWriter('TEMP.mae')
sch_writer.extend(structures)
sch_writer.close()
# Setup the minimization.
com_setup = MyComUtil()
com_setup.my_mini(
mae_file='TEMP.mae',
com_file='TEMP.com',
out_file='TEMP_OUT.mae',
frozen_atoms=frozen_atoms,
fix_torsions=fix_torsions)
command = ['bmin', '-WAIT', 'TEMP']
# Run the minimization.
job = jobcontrol.launch_job(command)
job.wait()
# Read the minimized structures.
sch_reader = sch_struct.StructureReader('TEMP_OUT.mae')
new_structures = []
for structure in sch_reader:
new_structures.append(structure)
sch_reader.close()
if len(new_structures) > 0:
print(' - MINI SUCCEEDED')
structures = [new_structures[0]]
else:
print(' - MINI FAILED. CONTINUING W/O MINI')
if DEBUG:
raw_input('Press any button to continue.')
# Remove temporary files.
os.remove('TEMP.mae')
os.remove('TEMP.com')
os.remove('TEMP_OUT.mae')
os.remove('TEMP.log')
return structures
def mcmm(structures, frozen_atoms=None):
"""
Takes many structures, and does a short MCMM search on them.
Arguments
---------
structure : list of Schrödinger structure
Returns
-------
list of Schrödinger structure
"""