-
Notifications
You must be signed in to change notification settings - Fork 2
/
rdconf.py
executable file
·120 lines (95 loc) · 4.12 KB
/
rdconf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/python3
import sys,string,argparse
from rdkit.Chem import AllChem as Chem
from optparse import OptionParser
import os, gzip
'''Given a smiles file, generate 3D conformers in output sdf.
Energy minimizes and filters conformers to meet energy window and rms constraints.
Some time ago I compared this to alternative conformer generators and
it was quite competitive (especially after RDKit's UFF implementation
added OOP terms).
'''
#convert smiles to sdf
def getRMS(mol, c1,c2):
rms = Chem.GetBestRMS(mol,mol,c1,c2)
return rms
parser = OptionParser(usage="Usage: %prog [options] <input>.smi <output>.sdf")
parser.add_option("--maxconfs", dest="maxconfs",action="store",
help="maximum number of conformers to generate per a molecule (default 20)", default="20", type="int", metavar="CNT")
parser.add_option("--sample_multiplier", dest="sample",action="store",
help="sample N*maxconfs conformers and choose the maxconformers with lowest energy (default 1)", default="1", type="float", metavar="N")
parser.add_option("--seed", dest="seed",action="store",
help="random seed (default 9162006)", default="9162006", type="int", metavar="s")
parser.add_option("--energy_window", dest="energy",action="store",
help="filter based on energy difference with lowest energy conformer", default="10", type="float", metavar="E")
parser.add_option("-v","--verbose", dest="verbose",action="store_true",default=False,
help="verbose output")
parser.add_option("--nomin", dest="nomin",action="store_true",default=False,
help="don't perform energy minimization (bad idea)")
(options, args) = parser.parse_args()
if(len(args) < 2):
parser.error("Need input and output")
sys.exit(-1)
input = args[0]
output = args[1]
smifile = open(input)
split = os.path.splitext(output)
if split[1] == '.gz':
outf=gzip.open(output,'wt+')
output = split[0] #strip .gz
else:
outf = open(output,'w+')
if os.path.splitext(output)[1] == '.pdb':
sdwriter = Chem.PDBWriter(outf)
else:
sdwriter = Chem.SDWriter(outf)
if sdwriter is None:
print("Could not open ".output)
sys.exit(-1)
for line in smifile:
toks = line.split()
smi = toks[0]
name = ' '.join(toks[1:])
pieces = smi.split('.')
if len(pieces) > 1:
smi = max(pieces, key=len) #take largest component by length
print("Taking largest component: %s\t%s" % (smi,name))
mol = Chem.MolFromSmiles(smi)
if mol is not None:
if options.verbose:
print(smi)
try:
Chem.SanitizeMol(mol)
mol = Chem.AddHs(mol)
mol.SetProp("_Name",name);
maxconfs = int(options.maxconfs)
params = Chem.ETKDGv3()
params.ignoreSmoothingFail = True
for i in range(maxconfs):
params.randomSeed = options.seed+i
cids = Chem.EmbedMultipleConfs(mol, maxconfs, params)
maxconfs -= len(cids)
if options.verbose:
print(len(cids),"conformers found")
cenergy = []
for conf in cids:
#not passing confID only minimizes the first conformer
if options.nomin:
cenergy.append(conf)
else:
converged = not Chem.UFFOptimizeMolecule(mol,confId=conf)
cenergy.append(Chem.UFFGetMoleculeForceField(mol,confId=conf).CalcEnergy())
if options.verbose:
print("Convergence of conformer",conf,converged,cenergy[-1])
for conf in cids:
sdwriter.write(mol,conf)
if maxconfs == 0:
break
except (KeyboardInterrupt, SystemExit):
raise
except Exception as e:
print("Exception",e)
else:
print("ERROR:",smi)
sdwriter.close()
outf.close()