In [1]:
import time
from rdkit import Chem
from rdkit.Chem import AllChem, TorsionFingerprints
import json
import tqdm
import numpy as np

from utils import *



_ColormakerRegistry()

In [None]:
def create_t_alkane(i):
    cin = 'C' * i
    m = Chem.MolFromSmiles(f'CCCC({cin})CCCC')
    m = Chem.rdmolops.AddHs(m)

    AllChem.EmbedMultipleConfs(m, numConfs=200, numThreads=-1)
    Chem.AllChem.MMFFOptimizeMoleculeConfs(m, numThreads=-1)


    confgen = ConformerGeneratorCustom(max_conformers=1, 
                      force_field='mmff',
                     pool_multiplier=1)  

    energys = confgen.get_conformer_energies(m)
    print(len(TorsionFingerprints.CalculateTorsionLists(m)[0]))
    standard = energys.min()
    total = np.sum(np.exp(-(energys-standard)))

    out = {
        'mol': Chem.MolToSmiles(m, isomericSmiles=False),
        'standard': standard,
        'total': total
    }
    
    return out


for i in range(4,14):
    out = create_t_alkane(i)
    with open(f'transfer_test_t_chain/{i}.json', 'w') as fp:
        json.dump(out, fp)
        Chem.MolToMolFile(new, f'{i}.mol')

        

In [None]:
i = 10 

cin = 'C' * i
m = Chem.MolFromSmiles(f'CCCC({cin})CCCC')
m = Chem.rdmolops.AddHs(m)

AllChem.EmbedMultipleConfs(m, numConfs=200, numThreads=-1)
Chem.AllChem.MMFFOptimizeMoleculeConfs(m, numThreads=-1)


confgen = ConformerGeneratorCustom(max_conformers=1, 
                  force_field='mmff',
                 pool_multiplier=1)  

energys = confgen.get_conformer_energies(m)
argsorted = np.argsort(energys)

new = Chem.Mol(m)
new.RemoveAllConformers()
conf = m.GetConformer(int(argsorted[0]))
new.AddConformer(conf, assignId=True)


print(len(TorsionFingerprints.CalculateTorsionLists(m)[0]))
standard = energys.min()
total = np.sum(np.exp(-(energys-standard)))

out = {
    'mol': Chem.MolToSmiles(m, isomericSmiles=False),
    'standard': standard,
    'total': total
}


import py3Dmol
p = py3Dmol.view(width=800,height=800)
print(len(TorsionFingerprints.CalculateTorsionLists(m)[0]))
drawit(m, p, confId=int(argsorted[0]))


In [None]:
def create_linear_alkane(i):
    m = Chem.MolFromSmiles('C' * i)
    m = Chem.rdmolops.AddHs(m)

    AllChem.EmbedMultipleConfs(m, numConfs=200, numThreads=-1)
    Chem.AllChem.MMFFOptimizeMoleculeConfs(m, numThreads=-1)


    confgen = ConformerGeneratorCustom(max_conformers=1, 
                      force_field='mmff',
                     pool_multiplier=1)  

    energys = confgen.get_conformer_energies(m)
    print(len(TorsionFingerprints.CalculateTorsionLists(m)[0]))
    standard = energys.min()
    total = np.sum(np.exp(-(energys-standard)))

    out = {
        'mol': Chem.MolToSmiles(m, isomericSmiles=False),
        'standard': standard,
        'total': total
    }
    
    return out


for i in range(10,15):
    out = create_linear_alkane(i)
    with open(f'transfer_test_straight_chain/{i}.json', 'w') as fp:
        json.dump(out, fp)
        

In [None]:
def create_branched():
    m = Chem.MolFromSmiles('CCCC')
    e = Chem.RWMol(m)

    numatoms = len(e.GetAtoms())
    tot = np.random.choice([15,16,17,18,19,20])
    while numatoms < tot:
        x = Chem.rdchem.Atom(6)
        randidx = np.random.randint(len(e.GetAtoms()))
        atom = e.GetAtomWithIdx(randidx)
        if atom.GetDegree() > 2:
            continue
        idx = e.AddAtom(x)
        e.AddBond(idx, randidx, Chem.rdchem.BondType.SINGLE)
        numatoms = len(e.GetAtoms())


    Chem.SanitizeMol(e)
    m = Chem.rdmolops.AddHs(e.GetMol())
    AllChem.EmbedMultipleConfs(m, numConfs=200, numThreads=-1)
    Chem.AllChem.MMFFOptimizeMoleculeConfs(m, numThreads=-1)


    confgen = ConformerGeneratorCustom(max_conformers=1, 
                     rmsd_threshold=None, 
                     force_field='mmff',
                     pool_multiplier=1)  

    energys = confgen.get_conformer_energies(m)
    print(len(TorsionFingerprints.CalculateTorsionLists(m)[0]))
    standard = energys.min()
    total = np.sum(np.exp(-(energys-standard)))
    
    out = {
        'mol': Chem.MolToSmiles(m, isomericSmiles=False),
        'standard': standard,
        'total': total
    }
    return out

In [None]:
for i in tqdm(range(500)):
    out = create_branched()
    with open(f'bigger_labeled/{i}.json', 'w') as fp:
        json.dump(out, fp)

In [None]:
create_branched()

In [None]:
confgen = ConformerGeneratorCustom(max_conformers=1, 
                 rmsd_threshold=None, 
                 force_field='mmff',
                 pool_multiplier=1)

one = 'COc1cc(C2OCC3C(c4ccc([O])c(OC)c4)OCC23)ccc1[O]'
two = 'COc1cc(C2Oc3c(OC)cc(C=CCO)cc3C2CO)ccc1[O]'
twoprime = 'COc1cc(C2Oc3c(OC)cc(C=CCO)cc3C2CO)ccc1O'
three = 'COc1cc(C2Oc3c(OC)cc(C(O)C(CO)Oc4ccc(C=CCO)cc4OC)cc3C2CO)ccc1[O]'
threeprime = 'COc1cc(C2Oc3c(OC)cc(C(O)C(CO)Oc4ccc(C=CCO)cc4OC)cc3C2CO)ccc1O'
# four = 'COc1cc(C(O)C(CO)Oc2ccc(C(O)C(CO)Oc3ccc(C(O)C(CO)Oc4ccc(C=CCO)cc4OC)cc3OC)cc2OC)ccc1[O]'
# ten = 'COc1cc(C2Oc3c(OC)cc(C4Oc5c(OC)cc(C(O)C(CO)Oc6ccc(C7OCC8C(c9cc(OC)c%10c(c9)C(CO)C(c9cc(OC)c%11c(c9)C(CO)C(c9ccc(O)c(OC)c9)O%11)O%10)OCC78)cc6OC)cc5C4CO)cc3C2CO)ccc1[O].COc1cc(C2Oc3c(OC)cc(C4Oc5c(OC)cc(C=CCO)cc5C4CO)cc3C2CO)ccc1O'
# nine = 'COc1cc(C(O)C(CO)Oc2ccc(C3OCC4C(c5cc(OC)c6c(c5)C(CO)C(c5ccc(OC(CO)C(O)c7cc(OC)c8c(c7)C(CO)C(c7ccc([O])c(OC)c7)O8)c(OC)c5)O6)OCC34)cc2OC)ccc1[O].COc1cc(C2Oc3c(OC)cc(C4Oc5c(OC)cc(C=CCO)cc5C4CO)cc3C2CO)ccc1O'
eight = 'COc1cc(C(O)C(CO)Oc2c(OC)cc(C(O)C(CO)Oc3ccc(C4Oc5c(OC)cc(C6Oc7c(OC)cc(C8Oc9c(OC)cc(C=CCO)cc9C8CO)cc7C6CO)cc5C4CO)cc3OC)cc2-c2cc(C3OCC4C(c5ccc(O)c(OC)c5)OCC34)cc(OC)c2[O])ccc1[O]'
eightprime = 'COc1cc(C(O)C(CO)Oc2c(OC)cc(C(O)C(CO)Oc3ccc(C4Oc5c(OC)cc(C6Oc7c(OC)cc(C8Oc9c(OC)cc(C=CCO)cc9C8CO)cc7C6CO)cc5C4CO)cc3OC)cc2-c2cc(C3OCC4C(c5ccc(O)c(OC)c5)OCC34)cc(OC)c2O)ccc1O'

x = Chem.MolFromSmiles(eightprime)
x = Chem.AddHs(x)
res = AllChem.EmbedMultipleConfs(x, numConfs=10, numThreads=-1)
Chem.AllChem.MMFFOptimizeMoleculeConfs(x, numThreads=-1)
confgen.get_conformer_energies(x).min()

In [None]:
import py3Dmol
p = py3Dmol.view(width=800,height=800)
print(len(TorsionFingerprints.CalculateTorsionLists(x)[0]))
drawit(x, p, confId=0)

In [None]:
nonring, ring = Chem.TorsionFingerprints.CalculateTorsionLists(mol)
all_tors = [Chem.TorsionFingerprints.CalculateTorsionAngles(mol, nonring, ring, confId=i) for i in range(200)]

In [9]:
# mol = Chem.MolFromSmiles('CC(CCC)CCCC(CCCC)CC')
# standard = 7.678142433712216
import json

with open('huge_hc_set/10_6461.json') as fp:
    o = json.load(fp)
print(o)
mol = Chem.MolFromSmiles(o['mol'])
standard = o['standard']
mol = Chem.AddHs(mol)
res = AllChem.EmbedMultipleConfs(mol, numConfs=200, numThreads=-1)
res = AllChem.MMFFOptimizeMoleculeConfs(mol, numThreads=-1)
import py3Dmol
p = py3Dmol.view(width=800,height=800)
drawit(mol, p, confId=0)

{'mol': '[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])C([H])(C([H])([H])C([H])([H])[H])C([H])([H])C([H])([H])[H]', 'standard': 1.8118855552367064, 'total': 20.795779524447195}


In [4]:
# mol = Chem.MolFromSmiles('CC(CCC)CCCC(CCCC)CC')
# standard = 7.678142433712216
import json

with open('diff/differentcarbon.json') as fp:
    o = json.load(fp)
print(o)
mol = Chem.MolFromSmiles(o['mol'])
standard = o['standard']
mol = Chem.AddHs(mol)
res = AllChem.EmbedMultipleConfs(mol, numConfs=200, numThreads=-1)
res = AllChem.MMFFOptimizeMoleculeConfs(mol, numThreads=-1)
import py3Dmol
p = py3Dmol.view(width=800,height=800)
drawit(mol, p, confId=0)

{'standard': 7.668625034772399, 'total': 13.263723987526067, 'mol': 'CC(CCC)CCCC(CCCC)CC'}


In [None]:
confgen.get_conformer_energies(mol)

In [None]:
%timeit tfd = array_to_lower_triangle(Chem.TorsionFingerprints.GetTFDMatrix(mol, useWeights=False), True)

In [None]:
%timeit tfd = Chem.TorsionFingerprints.GetTFDBetweenConformers(mol, range(0,mol.GetNumConformers() - 1), [-1], useWeights=False)


In [None]:
%timeit confgen.get_conformer_energies(mol)

In [None]:
out = confgen.get_conformer_energies(mol)
out = sorted(out)
for idx, i in enumerate(out):
    print(idx, i)

In [None]:
import py3Dmol
p = py3Dmol.view(width=800,height=800)
drawit(Chem.RemoveHs(mol), p, confId=0)

In [None]:
import pickle
with open('test_mol.pickle', 'rb') as fp:
    mol = pickle.load(fp)
    
import py3Dmol
p = py3Dmol.view(width=800,height=800)
drawit(mol, p, confId=0)

In [None]:
mol = Chem.MolFromMolFile('258-trihexyl-decane.mol')
standard = 14.88278294332602
mol = Chem.AddHs(mol)
res = AllChem.EmbedMultipleConfs(mol, numConfs=650, numThreads=-1, pruneRmsThresh=1.0)
res = AllChem.MMFFOptimizeMoleculeConfs(mol, numThreads=-1)


energys = confgen.get_conformer_energies(mol)
plt.title('RDKit set')
plt.plot(np.cumsum(0.9 * np.exp(-(energys - 14.88278294332602))) / 1.2363186365185044)

In [None]:
plt.plot(np.cumsum(1.4 * np.exp(-(menergys - 14.88278294332602))) / 1.2363186365185044, label='RL')
plt.plot(np.cumsum(0.4 * np.exp(-(energys - 14.88278294332602))) / 1.2363186365185044, label='ETKDG')
plt.legend()
plt.title('Gibbs vs Num_Samples')
plt.xlabel('Timestep / Number of Samples')
plt.ylabel('Normalized Gibbs')

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.title('ML model set')
plt.pie(np.sort(np.exp(-(energys - energys.max()))))

In [None]:
mol = Chem.MolFromMolFile('258-trihexyl-decane.mol')
standard = 14.88278294332602
mol = Chem.AddHs(mol)
res = AllChem.EmbedMultipleConfs(mol, numConfs=200, numThreads=-1)
res = AllChem.MMFFOptimizeMoleculeConfs(mol, numThreads=-1)

energys = confgen.get_conformer_energies(mol)
plt.title('RDKit set')
plt.pie(np.sort(np.exp(-(energys - energys.max()))))