# Exploring conformational space of selected macrocycles - "M1"

In this notebook we present and analyze selected structures, technical notes are [here](www.gitlab.com/user/gosia/icho).

In [163]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [164]:
import glob
import py3Dmol

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdMolAlign
from rdkit.Chem.Draw import IPythonConsole
from rdkit import rdBase
print(rdBase.rdkitVersion)
import os,time
print( time.asctime())

2016.09.4
Fri Apr  7 13:06:19 2017


In [165]:
# Functions used in this notebook:

def grep_energies_from_sdf_outputs(files):
    energies = {}
    for inp in files:
        with open(inp,'r') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                if "M  END" in line:
                    energies[os.path.splitext(os.path.basename(inp))[0]] = float(lines[i+1])
    return energies

def find_duplicates(rms_sorted, energy):
    i = 0
    to_be_deleted = []
    while i < len(rms_sorted):
        j = i + 1
        while j < len(rms_sorted):
            if rms_sorted[i][0] in to_be_deleted:
                i = i + 1
                j = j + 1
            elif rms_sorted[j][0] in to_be_deleted:
                j = j + 1
            else:
                rms1 = rms_sorted[i][1]
                rms2 = rms_sorted[j][1]
                if (rms2 - rms1) < rms_thresh:
                    if energy[rms_sorted[i][0]] < energy[rms_sorted[j][0]]:
                        to_be_deleted.append(rms_sorted[j][0])
                    else:
                        to_be_deleted.append(rms_sorted[i][0])
                else:
                    break
        i = i + 1
    if to_be_deleted:
        print("Conformers which will be deleted:")    
        print(to_be_deleted)
    return to_be_deleted

## Crystal structure of "M1" macrocycle

In [166]:
cm1 = open('/home/gosia/work/work_on_gitlab/icho/calcs/m1/m1_crystal.xyz','r').read()
vcm1 = py3Dmol.view(width=400,height=400)
vcm1.removeAllModels()
vcm1.addModel(cm1,'xyz')
vcm1.setStyle({'stick':{'radius':0.15,'color':'spectrum'}})
vcm1.setBackgroundColor('0xeeeeee')
vcm1.zoomTo()
vcm1.show()

In [167]:
# "core" is a part of a molecule, which we wish to be the "most-aligned" among multiple conformers
smiles      = 'O=C1NCCNC(=O)c2nc(C(=O)NCCNC(=O)c3nc1ccc3)ccc2'
core_smiles = 'n1ccccc1'

m1 = Chem.AddHs(Chem.MolFromSmiles(smiles))
core_m1 = m1.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))

templ_m1 = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/balloon/m1_crystal.sdf')
m1_crystal = templ_m1[0]

## Conformers generated with the Balloon software:

Conformers were generated in two ways (genetic algorithm):

* Starting with the crystal geometry kept as a template, output: "m1_b_crystal" on the left fig. below

* Starting with the SMILES signature of M1 and allowing to "rebuild the geometry" (option --rebuildGeometry), output: "m1_b_smiles" on the right fig. below

In both cases the Balloon software was asked for 50 conformers (with other parameters set to default values) and it found 6 conformers in the former case and 7 in the latter.

In [168]:
inps_m1_b_sdf = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/balloon/results_starting_from_crystalsdf/*.sdf')

In [169]:
inps_m1_b_smi = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/balloon/results_starting_from_crystalsmiles/*.sdf')

In [170]:
e_m1_b_sdf = grep_energies_from_sdf_outputs(inps_m1_b_sdf)
e_m1_b_smi = grep_energies_from_sdf_outputs(inps_m1_b_smi)

In [171]:
%%html
<table>
  <tr>
    <td id="m1_b_crystal" ></td>
    <td id="m1_b_smiles"  ></td>
  <tr>
    <td> m1_b_crystal </td>
    <td> m1_b_smiles  </td>  
  </tr>
</table>

0,1
,
m1_b_crystal,m1_b_smiles


In [172]:
# write conformers to dictionaries
allmol_m1_b_sdf = {}
allmol_m1_b_smi = {}
suppl_m1_b_sdf  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/balloon/m1_crystal_sdfout.sdf')
suppl_m1_b_smi  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/balloon/m1_crystal_smilesout.sdf')

for i, mol in enumerate(suppl_m1_b_sdf):
    name = "m1_b_sdf_" + str(i)
    allmol_m1_b_sdf[name] = mol
for i, mol in enumerate(suppl_m1_b_smi):
    name = "m1_b_smi_" + str(i)
    allmol_m1_b_smi[name] = mol    

In [173]:
# align:
for key, mol in allmol_m1_b_sdf.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
for key, mol in allmol_m1_b_smi.items():   
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))    

In [174]:
# view:
p1_b_handles=[]

p1_b_sdf = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_b_sdf.items():
    mb = Chem.MolToMolBlock(mol)
    p1_b_sdf.addModel(mb,'sdf')
p1_b_sdf.setStyle({'stick':{'radius':'0.15'}})
p1_b_sdf.setBackgroundColor('0xeeeeee')
p1_b_sdf.zoomTo()    
p1_b_handles.append(p1_b_sdf)

p1_b_smi = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_b_smi.items():
    mb = Chem.MolToMolBlock(mol)
    p1_b_smi.addModel(mb,'sdf')
p1_b_smi.setStyle({'stick':{'radius':'0.15'}})
p1_b_smi.setBackgroundColor('0xeeeeee')
p1_b_smi.zoomTo()    
p1_b_handles.append(p1_b_smi)

In [175]:
p1_b_handles[0].insert('m1_b_crystal')

In [176]:
p1_b_handles[1].insert('m1_b_smiles')

### pre-screening

Some of the generated conformers are very much alike. To remove potential duplicates which were not "caught" by the Balloon program, we can compare the energies (preoptimized with MM) and the RMSD calculated against a reference structure (here: the crystal structure of M1). It does not matter against which structure we are aligning the conformers, since we are interested in relative RMS between them.

First let's print the energies and RMS values:

In [177]:
allmol_m1_b = {}
allmol_m1_b.update(allmol_m1_b_sdf)
allmol_m1_b.update(allmol_m1_b_smi)

energy_m1_b = {}
energy_m1_b.update(e_m1_b_sdf)
energy_m1_b.update(e_m1_b_smi)

rms_m1_b = {}
for key, mol in allmol_m1_b.items():
    rms_m1_b[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1_b[key], rms_m1_b[key]))

Then we can introduce some thresholds, for instance:

* if two conformers differ by less than 0.01 in RMS (measured against the reference structure), then select the one with the lower energy

In [178]:
rms_sorted = sorted(rms_m1_b.items(), key=lambda x: x[1])
rms_thresh = 0.01

print("List sorted by RMS:")
for i, t in enumerate(rms_sorted):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted[i][0], energy_m1_b[rms_sorted[i][0]], rms_sorted[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_b)

for mol in to_be_deleted:
    del allmol_m1_b[mol]
    del energy_m1_b[mol]
    del rms_m1_b[mol]  

List sorted by RMS:
name = m1_b_smi_4, E = 54.259997, RMS = 0.382989
name = m1_b_smi_3, E = 54.028468, RMS = 0.408258
name = m1_b_sdf_1, E = 53.583766, RMS = 0.517415
name = m1_b_sdf_3, E = 55.441273, RMS = 0.527526
name = m1_b_sdf_0, E = 53.245621, RMS = 0.662122
name = m1_b_smi_0, E = 53.280464, RMS = 0.756497
name = m1_b_smi_5, E = 57.332921, RMS = 0.770531
name = m1_b_sdf_4, E = 57.760058, RMS = 0.852633
name = m1_b_sdf_5, E = 62.424468, RMS = 0.880096
name = m1_b_smi_2, E = 53.919798, RMS = 0.894811
name = m1_b_sdf_2, E = 53.651385, RMS = 1.174442
name = m1_b_smi_1, E = 53.729899, RMS = 1.205705
name = m1_b_smi_6, E = 63.187321, RMS = 1.239890


Below we will align the selected conformers:

In [179]:
for key, mol in allmol_m1_b.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p_b = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_b.items():
    mb = Chem.MolToMolBlock(mol)
    p_b.addModel(mb,'sdf')
p_b.setStyle({'stick':{'radius':'0.15'}})
p_b.setBackgroundColor('0xeeeeee')
p_b.zoomTo()
p_b.show()

## Conformers generated with the RDKit software:

In [180]:
inps_m1_rdkit_smi = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_crystal_from_smiles/*.sdf')
inps_m1_rdkit_sdf = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_crystal_from_sdf/*.sdf')

In [181]:
e_m1_rdkit_smi = grep_energies_from_sdf_outputs(inps_m1_rdkit_smi)
e_m1_rdkit_sdf = grep_energies_from_sdf_outputs(inps_m1_rdkit_sdf)

In [182]:
# write conformers to dictionaries
allmol_m1_rdkit_smi = {}
suppl_m1_rdkit_smi  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_smiles.sdf')
allmol_m1_rdkit_sdf = {}
suppl_m1_rdkit_sdf  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_sdf.sdf')

for i, mol in enumerate(suppl_m1_rdkit_smi):
    name = "m1_rdkit_smi_" + str(i)
    allmol_m1_rdkit_smi[name] = mol  
for i, mol in enumerate(suppl_m1_rdkit_sdf):
    name = "m1_rdkit_sdf_" + str(i)
    allmol_m1_rdkit_sdf[name] = mol     

In [183]:
# align:
for key, mol in allmol_m1_rdkit_smi.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
for key, mol in allmol_m1_rdkit_sdf.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles)) 
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))

In [184]:
%%html
<table>
  <tr>
    <td id="m1_rdkit_crystal" ></td>
    <td id="m1_rdkit_smiles"  ></td>
  <tr>
    <td> m1_rdkit_crystal </td>
    <td> m1_rdkit_smiles  </td>  
  </tr>
</table>

0,1
,
m1_rdkit_crystal,m1_rdkit_smiles


In [185]:
# view:
p1_rdkit_handles=[]

p1_rdkit_sdf = py3Dmol.view(width=400,height=400)
p1_rdkit_sdf.removeAllModels()
for key, mol in allmol_m1_rdkit_sdf.items(): 
    mb = Chem.MolToMolBlock(mol)
    p1_rdkit_sdf.addModel(mb,'sdf')    
p1_rdkit_sdf.setStyle({'stick':{'radius':'0.15'}})
p1_rdkit_sdf.setBackgroundColor('0xeeeeee')
p1_rdkit_sdf.zoomTo()
p1_rdkit_handles.append(p1_rdkit_sdf)

p1_rdkit_smi = py3Dmol.view(width=400,height=400)
p1_rdkit_smi.removeAllModels()
for key, mol in allmol_m1_rdkit_smi.items(): 
    mb = Chem.MolToMolBlock(mol)
    p1_rdkit_smi.addModel(mb,'sdf')    
p1_rdkit_smi.setStyle({'stick':{'radius':'0.15'}})
p1_rdkit_smi.setBackgroundColor('0xeeeeee')
p1_rdkit_smi.zoomTo()
p1_rdkit_handles.append(p1_rdkit_smi)

In [186]:
p1_rdkit_handles[0].insert('m1_rdkit_crystal')

In [187]:
p1_rdkit_handles[1].insert('m1_rdkit_smiles')

### pre-screening

In [188]:
allmol_m1_rdkit = {}
allmol_m1_rdkit.update(allmol_m1_rdkit_sdf)
allmol_m1_rdkit.update(allmol_m1_rdkit_smi)

energy_m1_rdkit = {}
energy_m1_rdkit.update(e_m1_rdkit_sdf)
energy_m1_rdkit.update(e_m1_rdkit_smi)

rms_m1_rdkit = {}
for key, mol in allmol_m1_rdkit.items():
    rms_m1_rdkit[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1_rdkit[key], rms_m1_rdkit[key]))

In [189]:
rms_sorted = sorted(rms_m1_rdkit.items(), key=lambda x: x[1])
rms_thresh = 0.01

print("List sorted by RMS:")
for i, t in enumerate(rms_sorted):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted[i][0], energy_m1_rdkit[rms_sorted[i][0]], rms_sorted[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_rdkit)

for mol in to_be_deleted:
    del allmol_m1_rdkit[mol]
    del energy_m1_rdkit[mol]
    del rms_m1_rdkit[mol]  

List sorted by RMS:
name = m1_rdkit_sdf_7, E = 46.330000, RMS = 0.852154
name = m1_rdkit_sdf_6, E = 46.330000, RMS = 0.861305
name = m1_rdkit_smi_12, E = 53.870000, RMS = 0.961759
name = m1_rdkit_smi_28, E = 57.820000, RMS = 1.114038
name = m1_rdkit_smi_23, E = 57.190000, RMS = 1.208917
name = m1_rdkit_smi_24, E = 61.810000, RMS = 1.269369
name = m1_rdkit_smi_13, E = 60.350000, RMS = 1.280484
name = m1_rdkit_smi_27, E = 60.350000, RMS = 1.315356
name = m1_rdkit_smi_20, E = 53.140000, RMS = 1.321991
name = m1_rdkit_sdf_0, E = 47.220000, RMS = 1.336265
name = m1_rdkit_smi_31, E = 54.060000, RMS = 1.337848
name = m1_rdkit_smi_25, E = 59.930000, RMS = 1.365630
name = m1_rdkit_smi_11, E = 53.870000, RMS = 1.392931
name = m1_rdkit_smi_14, E = 58.070000, RMS = 1.396586
name = m1_rdkit_smi_18, E = 58.070000, RMS = 1.396645
name = m1_rdkit_smi_34, E = 58.200000, RMS = 1.401170
name = m1_rdkit_smi_8, E = 58.070000, RMS = 1.417605
name = m1_rdkit_smi_1, E = 58.200000, RMS = 1.421473
name = m1_rdk

Below we will align the selected conformers:

In [190]:
for key, mol in allmol_m1_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p_r = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_rdkit.items():
    mb = Chem.MolToMolBlock(mol)
    p_r.addModel(mb,'sdf')
p_r.setStyle({'stick':{'radius':'0.15'}})
p_r.setBackgroundColor('0xeeeeee')
p_r.zoomTo()
p_r.show()

## Summary

Now let's generate a list of all conformers (from all programs used, as presented above). We can further pre-screen all the structures and remove potential duplicates. Here we can also use more crude threshold.

In [191]:
allmol_m1 = {}
allmol_m1.update(allmol_m1_b)
allmol_m1.update(allmol_m1_rdkit)

energy_m1 = {}
energy_m1.update(energy_m1_b)
energy_m1.update(energy_m1_rdkit)

rms_m1 = {}
for key, mol in allmol_m1.items():
    rms_m1[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1[key], rms_m1[key]))

In [192]:
rms_sorted = sorted(rms_m1.items(), key=lambda x: x[1])
rms_thresh = 0.01

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1)

for mol in to_be_deleted:
    del allmol_m1[mol]
    del energy_m1[mol]
    del rms_m1[mol]  

Conformers which will be deleted:
['m1_b_sdf_4', 'm1_rdkit_smi_23', 'm1_rdkit_sdf_4']


Finally we can align all conformers which will further be used as starting points in DFT geometry optimizations:

In [193]:
for key, mol in allmol_m1.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1.items():
    mb = Chem.MolToMolBlock(mol)
    p.addModel(mb,'sdf')
p.setStyle({'stick':{'radius':'0.15'}})
p.setBackgroundColor('0xeeeeee')
p.zoomTo()
p.show()

Write the selected conformers' names to the list "list_selected_conformers_from_balloon_rdkit". It will be used to generate Gaussian inputs:

In [196]:
with open("/home/gosia/work/work_on_gitlab/icho/calcs/m1/list_selected_conformers_from_ballon_rdkit", "w") as f:
    for key, mol in allmol_m1.items():
        f.write(key+"\n")      