# Exploring conformational space of selected macrocycles - "M7" 

In this notebook we present and analyze selected structures, technical notes are [here](www.gitlab.com/user/gosia/icho).

In [66]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [67]:
import glob
import py3Dmol

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdMolAlign
from rdkit.Chem.Draw import IPythonConsole
from rdkit import rdBase
print(rdBase.rdkitVersion)
import os,time
print( time.asctime())

2016.09.4
Fri Apr  7 13:06:30 2017


In [68]:
# Functions used in this notebook:

def grep_energies_from_sdf_outputs(files):
    energies = {}
    for inp in files:
        with open(inp,'r') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                if "M  END" in line:
                    energies[os.path.splitext(os.path.basename(inp))[0]] = float(lines[i+1])
    return energies

def find_duplicates(rms_sorted, energy):
    i = 0
    to_be_deleted = []
    while i < len(rms_sorted):
        j = i + 1
        while j < len(rms_sorted):
            if rms_sorted[i][0] in to_be_deleted:
                i = i + 1
                j = j + 1
            elif rms_sorted[j][0] in to_be_deleted:
                j = j + 1
            else:
                rms1 = rms_sorted[i][1]
                rms2 = rms_sorted[j][1]
                if (rms2 - rms1) < rms_thresh:
                    if energy[rms_sorted[i][0]] < energy[rms_sorted[j][0]]:
                        to_be_deleted.append(rms_sorted[j][0])
                    else:
                        to_be_deleted.append(rms_sorted[i][0])
                else:
                    break
        i = i + 1
    if to_be_deleted:
        print("Conformers which will be deleted:")    
        print(to_be_deleted)
    return to_be_deleted

## Crystal structure of "M7" macrocycle

In [69]:
cm7 = open('/home/gosia/work/work_on_gitlab/icho/calcs/m7/m7_crystal.xyz','r').read()
vcm7 = py3Dmol.view(width=400,height=400)
vcm7.addModel(cm7,'xyz')
vcm7.setStyle({'stick':{}})
vcm7.setBackgroundColor('0xeeeeee')
vcm7.zoomTo()
vcm7.show()

In [70]:
# "core" is a part of a molecule, which we wish to be the "most-aligned" among multiple conformers
smiles      = 'N1C(=O)c2nc(C(=O)NCCCNC(=O)c3nc(C(=O)NCCC1)ccc3)ccc2'
core_smiles = 'C(=O)c1nc(C=O)ccc1'

m7 = Chem.AddHs(Chem.MolFromSmiles(smiles))
core_m7 = m7.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))

templ_m7 = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7/m7_crystal.sdf')
m7_crystal = templ_m7[0]

## Conformers generated with the Balloon software:

Conformers were generated in two ways (genetic algorithm):

* Starting with the crystal geometry kept as a template, output: "m7_b_crystal" on the left fig. below

* Starting with the SMILES signature of M7 and allowing to "rebuild the geometry" (option --rebuildGeometry), output: "m7_b_smiles" on the right fig. below

In both cases the Balloon software was asked for 50 conformers (with other parameters set to default values) and it found 10 conformers in the former case and 7 in the latter.

In [71]:
inps_m7_b_sdf = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7/balloon/results_starting_from_crystalsdf/*.sdf')

In [72]:
inps_m7_b_smi = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7/balloon/results_starting_from_crystalsmiles/*.sdf')

In [73]:
e_m7_b_sdf = grep_energies_from_sdf_outputs(inps_m7_b_sdf)
e_m7_b_smi = grep_energies_from_sdf_outputs(inps_m7_b_smi)

In [74]:
%%html
<table>
  <tr>
    <td id="m7_b_crystal" ></td>
    <td id="m7_b_smiles"  ></td>
  <tr>
    <td> m7_b_crystal </td>
    <td> m7_b_smiles  </td>  
  </tr>
</table>

0,1
,
m7_b_crystal,m7_b_smiles


In [75]:
# write conformers to dictionaries
allmol_m7_b_sdf = {}
allmol_m7_b_smi = {}
suppl_m7_b_sdf  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7/balloon/m7_crystal_sdfout.sdf')
suppl_m7_b_smi  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7/balloon/m7_crystal_smilesout.sdf')

for i, mol in enumerate(suppl_m7_b_sdf):
    name = "m7_b_sdf_" + str(i)
    allmol_m7_b_sdf[name] = mol
for i, mol in enumerate(suppl_m7_b_smi):
    name = "m7_b_smi_" + str(i)
    allmol_m7_b_smi[name] = mol    

In [76]:
# align:
for key, mol in allmol_m7_b_sdf.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
for key, mol in allmol_m7_b_smi.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))  

In [77]:
# view:
p7_b_handles=[]

p7_b_sdf = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7_b_sdf.items():
    mb = Chem.MolToMolBlock(mol)
    p7_b_sdf.addModel(mb,'sdf')
p7_b_sdf.setStyle({'stick':{'radius':'0.15'}})
p7_b_sdf.setBackgroundColor('0xeeeeee')
p7_b_sdf.zoomTo()    
p7_b_handles.append(p7_b_sdf)

p7_b_smi = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7_b_sdf.items():
    mb = Chem.MolToMolBlock(mol)
    p7_b_smi.addModel(mb,'sdf')
p7_b_smi.setStyle({'stick':{'radius':'0.15'}})
p7_b_smi.setBackgroundColor('0xeeeeee')
p7_b_smi.zoomTo()    
p7_b_handles.append(p7_b_smi)

In [78]:
p7_b_handles[0].insert('m7_b_crystal')

In [79]:
p7_b_handles[1].insert('m7_b_smiles')

### pre-screening

Some of the generated conformers are very much alike. To remove potential duplicates which were not "caught" by the Balloon program, we can compare the energies (preoptimized with MM) and the RMSD calculated against a reference structure (here: the crystal structure of M7).

First let's print the energies and RMS values:

In [80]:
allmol_m7_b = {}
allmol_m7_b.update(allmol_m7_b_sdf)
allmol_m7_b.update(allmol_m7_b_smi)

energy_m7_b = {}
energy_m7_b.update(e_m7_b_sdf)
energy_m7_b.update(e_m7_b_smi)

rms_m7_b = {}
for key, mol in allmol_m7_b.items():
    rms_m7_b[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m7_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m7_b[key], rms_m7_b[key]))

Then we can introduce some thresholds, for instance:

* if two conformers differ by less than 0.01 in RMS (measured against the reference structure), then select the one with the lower energy

In [81]:
rms_sorted = sorted(rms_m7_b.items(), key=lambda x: x[1])
rms_thresh = 0.01

print("List sorted by RMS:")
for i, t in enumerate(rms_sorted):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted[i][0], energy_m7_b[rms_sorted[i][0]], rms_sorted[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m7_b)

for mol in to_be_deleted:
    del allmol_m7_b[mol]
    del energy_m7_b[mol]
    del rms_m7_b[mol] 

List sorted by RMS:
name = m7_b_sdf_1, E = 51.783164, RMS = 0.209230
name = m7_b_sdf_0, E = 48.372890, RMS = 0.593787
name = m7_b_smi_0, E = 54.675946, RMS = 0.844525
name = m7_b_smi_2, E = 62.770633, RMS = 1.346777
name = m7_b_sdf_8, E = 57.683850, RMS = 1.363563
name = m7_b_sdf_9, E = 58.104823, RMS = 1.380051
name = m7_b_smi_5, E = 64.114107, RMS = 1.414668
name = m7_b_smi_4, E = 63.756299, RMS = 1.660194
name = m7_b_sdf_6, E = 56.106820, RMS = 1.799433
name = m7_b_smi_1, E = 60.504327, RMS = 1.803571
name = m7_b_sdf_3, E = 53.693431, RMS = 1.840951
name = m7_b_sdf_4, E = 55.531843, RMS = 1.877492
name = m7_b_smi_3, E = 63.179995, RMS = 1.917011
name = m7_b_sdf_7, E = 57.536989, RMS = 1.921691
name = m7_b_sdf_2, E = 52.100671, RMS = 2.007561
name = m7_b_sdf_5, E = 55.688970, RMS = 2.016376
name = m7_b_smi_6, E = 64.617063, RMS = 2.053687
Conformers which will be deleted:
['m7_b_smi_1', 'm7_b_smi_3', 'm7_b_sdf_5']


Below we will align the selected conformers:

In [82]:
for key, mol in allmol_m7_b.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
    
p_b = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7_b.items():
    mb = Chem.MolToMolBlock(mol)
    p_b.addModel(mb,'sdf')
p_b.setStyle({'stick':{'radius':'0.15'}})
p_b.setBackgroundColor('0xeeeeee')
p_b.zoomTo()
p_b.show()

### Conformers generated with the RDKit software

RDKit found 6 conformers of similar energy:

In [83]:
# create a list of all structures to be aligned
inps_m7_rdkit_smi = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7/rdkit/results_crystal_from_smiles/*.sdf')
inps_m7_rdkit_sdf = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7/rdkit/results_crystal_from_sdf/*.sdf')

In [84]:
e_m7_rdkit_smi = grep_energies_from_sdf_outputs(inps_m7_rdkit_smi)
e_m7_rdkit_sdf = grep_energies_from_sdf_outputs(inps_m7_rdkit_sdf)

In [85]:
# write conformers to dictionaries
allmol_m7_rdkit_smi = {}
suppl_m7_rdkit_smi  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7/rdkit/result_smiles.sdf')
allmol_m7_rdkit_sdf = {}
suppl_m7_rdkit_sdf  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7/rdkit/result_sdf.sdf')

for i, mol in enumerate(suppl_m7_rdkit_smi):
    name = "m7_rdkit_smi_" + str(i)
    allmol_m7_rdkit_smi[name] = mol  
for i, mol in enumerate(suppl_m7_rdkit_sdf):
    name = "m7_rdkit_sdf_" + str(i)
    allmol_m7_rdkit_sdf[name] = mol  

In [86]:
# align:
for key, mol in allmol_m7_rdkit_smi.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
for key, mol in allmol_m7_rdkit_sdf.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles)) 
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))

In [87]:
%%html
<table>
  <tr>
    <td id="m7_rdkit_crystal" ></td>
    <td id="m7_rdkit_smiles"  ></td>
  <tr>
    <td> m7_rdkit_crystal </td>
    <td> m7_rdkit_smiles  </td>  
  </tr>
</table>

0,1
,
m7_rdkit_crystal,m7_rdkit_smiles


In [88]:
# view:
p7_rdkit_handles=[]

p7_rdkit_sdf = py3Dmol.view(width=400,height=400)
p7_rdkit_sdf.removeAllModels()
for key, mol in allmol_m7_rdkit_sdf.items(): 
    mb = Chem.MolToMolBlock(mol)
    p7_rdkit_sdf.addModel(mb,'sdf')    
p7_rdkit_sdf.setStyle({'stick':{'radius':'0.15'}})
p7_rdkit_sdf.setBackgroundColor('0xeeeeee')
p7_rdkit_sdf.zoomTo()
p7_rdkit_handles.append(p7_rdkit_sdf)

p7_rdkit_smi = py3Dmol.view(width=400,height=400)
p7_rdkit_smi.removeAllModels()
for key, mol in allmol_m7_rdkit_smi.items(): 
    mb = Chem.MolToMolBlock(mol)
    p7_rdkit_smi.addModel(mb,'sdf')    
p7_rdkit_smi.setStyle({'stick':{'radius':'0.15'}})
p7_rdkit_smi.setBackgroundColor('0xeeeeee')
p7_rdkit_smi.zoomTo()
p7_rdkit_handles.append(p7_rdkit_smi)

In [89]:
p7_rdkit_handles[0].insert('m7_rdkit_crystal')

In [90]:
p7_rdkit_handles[1].insert('m7_rdkit_smiles')

### pre-screening

In [91]:
allmol_m7_rdkit = {}
allmol_m7_rdkit.update(allmol_m7_rdkit_sdf)
allmol_m7_rdkit.update(allmol_m7_rdkit_smi)

energy_m7_rdkit = {}
energy_m7_rdkit.update(e_m7_rdkit_sdf)
energy_m7_rdkit.update(e_m7_rdkit_smi)

rms_m7_rdkit = {}
for key, mol in allmol_m7_rdkit.items():
    rms_m7_rdkit[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m7_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m7_rdkit[key], rms_m7_rdkit[key]))

In [92]:
rms_sorted = sorted(rms_m7_rdkit.items(), key=lambda x: x[1])
rms_thresh = 0.01

print("List sorted by RMS:")
for i, t in enumerate(rms_sorted):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted[i][0], energy_m7_rdkit[rms_sorted[i][0]], rms_sorted[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m7_rdkit)

for mol in to_be_deleted:
    del allmol_m7_rdkit[mol]
    del energy_m7_rdkit[mol]
    del rms_m7_rdkit[mol]  

List sorted by RMS:
name = m7_rdkit_smi_24, E = 52.620000, RMS = 1.062678
name = m7_rdkit_smi_3, E = 60.870000, RMS = 1.669831
name = m7_rdkit_smi_34, E = 60.970000, RMS = 1.673081
name = m7_rdkit_smi_9, E = 55.120000, RMS = 1.727142
name = m7_rdkit_sdf_0, E = 43.290000, RMS = 1.729277
name = m7_rdkit_smi_18, E = 57.730000, RMS = 1.756237
name = m7_rdkit_smi_26, E = 56.760000, RMS = 1.769920
name = m7_rdkit_smi_33, E = 58.070000, RMS = 1.791185
name = m7_rdkit_sdf_13, E = 41.570000, RMS = 1.811507
name = m7_rdkit_sdf_12, E = 46.100000, RMS = 1.820971
name = m7_rdkit_smi_6, E = 58.780000, RMS = 1.866204
name = m7_rdkit_smi_20, E = 57.100000, RMS = 1.887489
name = m7_rdkit_smi_36, E = 56.760000, RMS = 1.887921
name = m7_rdkit_smi_16, E = 58.080000, RMS = 1.909533
name = m7_rdkit_smi_31, E = 56.160000, RMS = 1.913987
name = m7_rdkit_smi_28, E = 60.530000, RMS = 1.934436
name = m7_rdkit_sdf_11, E = 43.590000, RMS = 1.947649
name = m7_rdkit_smi_10, E = 57.310000, RMS = 1.962343
name = m7_rd

Below we will align the selected conformers:

In [93]:
for key, mol in allmol_m7_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
    
p_r = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7_rdkit.items():
    mb = Chem.MolToMolBlock(mol)
    p_r.addModel(mb,'sdf')
p_r.setStyle({'stick':{'radius':'0.15'}})
p_r.setBackgroundColor('0xeeeeee')
p_r.zoomTo()
p_r.show()

## Summary

Now let's generate a list of all conformers (from all programs used, as presented above). We can further pre-screen all the structures and remove potential duplicates. Here we can also use more crude threshold.

In [94]:
allmol_m7 = {}
allmol_m7.update(allmol_m7_b)
allmol_m7.update(allmol_m7_rdkit)

energy_m7 = {}
energy_m7.update(energy_m7_b)
energy_m7.update(energy_m7_rdkit)

rms_m7 = {}
for key, mol in allmol_m7.items():
    rms_m7[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m7_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1[key], rms_m1[key]))

In [95]:
rms_sorted = sorted(rms_m7.items(), key=lambda x: x[1])
rms_thresh = 0.01

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m7)

for mol in to_be_deleted:
    del allmol_m7[mol]
    del energy_m7[mol]
    del rms_m7[mol]  

Conformers which will be deleted:
['m7_b_smi_4', 'm7_rdkit_smi_33', 'm7_b_sdf_7', 'm7_rdkit_smi_14']


Finally we can align all conformers which will further be used as starting points in DFT geometry optimizations:

In [96]:
for key, mol in allmol_m7.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
    
p = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7.items():
    mb = Chem.MolToMolBlock(mol)
    p.addModel(mb,'sdf')
p.setStyle({'stick':{'radius':'0.15'}})
p.setBackgroundColor('0xeeeeee')
p.zoomTo()
p.show()

Write the selected conformers' names to the list "list_selected_conformers_from_balloon_rdkit". It will be used to generate Gaussian inputs:

In [97]:
with open("/home/gosia/work/work_on_gitlab/icho/calcs/m7/list_selected_conformers_from_ballon_rdkit", "w") as f:
    for key, mol in allmol_m7.items():
        f.write(key+"\n")