# Exploring conformational space of "M7+1H2O"

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [1]:
import glob
import py3Dmol

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdMolAlign
from rdkit.Chem.Draw import IPythonConsole
from rdkit import rdBase
print(rdBase.rdkitVersion)
import os,time
print( time.asctime())

2016.09.4
Thu Apr 27 11:08:58 2017


In [2]:
# Functions used in this notebook:

def grep_energies_from_sdf_outputs(files):
    energies = {}
    for inp in files:
        with open(inp,'r') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                if "M  END" in line:
                    energies[os.path.splitext(os.path.basename(inp))[0]] = float(lines[i+1])
    return energies

def find_duplicates(rms_sorted, energy, rms_thresh):
    i = 0
    to_be_deleted = []
    while i < len(rms_sorted):
        j = i + 1
        while j < len(rms_sorted):
            if rms_sorted[i][0] in to_be_deleted:
                i = i + 1
                j = j + 1
            elif rms_sorted[j][0] in to_be_deleted:
                j = j + 1
            else:
                rms1 = rms_sorted[i][1]
                rms2 = rms_sorted[j][1]
                if (rms2 - rms1) < rms_thresh:
                    if energy[rms_sorted[i][0]] < energy[rms_sorted[j][0]]:
                        to_be_deleted.append(rms_sorted[j][0])
                    else:
                        to_be_deleted.append(rms_sorted[i][0])
                else:
                    break
        i = i + 1
    if to_be_deleted:
        print("Conformers which will be deleted:")    
        print(to_be_deleted)
    return to_be_deleted

In [9]:
# useful for later:
# "core" is a part of a molecule, which we wish to be the "most-aligned" among multiple conformers
smiles      = 'N1C(=O)c2nc(C(=O)NCCCNC(=O)c3nc(C(=O)NCCC1)ccc3)ccc2'
core_smiles = 'C(=O)c1nc(C=O)ccc1'

m7 = Chem.AddHs(Chem.MolFromSmiles(smiles))
core_m7 = m1.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))

templ_m7 = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7/m7_crystal.sdf')
m7_crystal = templ_m7[0]

## M7 + H2O

M7 + H2O structures were generated from the crystal geometry of isolated M7 macrocycle, to which we have added :

* 1 H2O molecule "inside" the macrocycle ("m7_h2o_in", left fig. below)

* 1 H2O molecule "outside" the macrocycle ("m7_h2o_out", right fig. below)

manually in Avogadro software; then pre-optimized with MM implemented in Avogadro.

On figures below we present already pre-optimized "m7 + h2o" complexes, which serve as starting geometries for exploration of the conformational space.

In [10]:
%%html
<table>
  <tr>
    <td id="m7_h2o_in" ></td>
    <td id="m7_h2o_out" ></td>
  <tr>
    <td> m7_h2o_in </td>
    <td> m7_h2o_out  </td>  
  </tr>
</table>

0,1
,
m7_h2o_in,m7_h2o_out


In [12]:
m_in = open('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/m7_h2o_in.xyz', 'r').read()
p_in = py3Dmol.view(width=300,height=300)
p_in.addModel(m_in,'xyz')
p_in.setStyle({'stick':{'radius':'0.15'}})
p_in.setBackgroundColor('0xeeeeee')
p_in.zoomTo()
p_in.insert('m7_h2o_in')

In [14]:
m_out = open('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/m7_h2o_out.xyz', 'r').read()
p_out = py3Dmol.view(width=300,height=300)
p_out.addModel(m_out,'xyz')
p_out.setStyle({'stick':{'radius':'0.15'}})
p_out.setBackgroundColor('0xeeeeee')
p_out.zoomTo()
p_out.insert('m7_h2o_out')

### Conformers generated with the Balloon software:

In both cases the Balloon software was asked to generate 100 conformers using the genertic algorithm with default settings (only "maxPostprocessIter" increased to 150 and "nGenerations" to 300).

Below we will present aligned structures of macrocycle M1 conformers in "M1 + H2O" (without showing H2O molecules).

In [17]:
inps_m7_h2o_in_b  = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/balloon/results_starting_from_m7_h2o_in/*.sdf')
inps_m7_h2o_out_b = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/balloon/results_starting_from_m7_h2o_out/*.sdf')

In [18]:
e_m7_h2o_in_b  = grep_energies_from_sdf_outputs(inps_m7_h2o_in_b)
e_m7_h2o_out_b = grep_energies_from_sdf_outputs(inps_m7_h2o_out_b)

In [19]:
%%html
<table>
  <tr>
    <td id="m7_h2o_in_b" ></td>
    <td id="m7_h2o_out_b"  ></td>
  <tr>
    <td> m7_h2o_in_b </td>
    <td> m7_h2o_out_b  </td>  
  </tr>
</table>

0,1
,
m7_h2o_in_b,m7_h2o_out_b


In [20]:
# write conformers to dictionaries
allmol_m7_h2o_in_b  = {}
allmol_m7_h2o_out_b = {}
suppl_m7_h2o_in_b  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/balloon/m7_h2o_in_result.sdf')
suppl_m7_h2o_out_b = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/balloon/m7_h2o_out_result.sdf')

for i, mol in enumerate(suppl_m7_h2o_in_b):
    name = "m7_h2o_in_b_" + str(i)
    allmol_m7_h2o_in_b[name] = mol
for i, mol in enumerate(suppl_m7_h2o_out_b):
    name = "m7_h2o_out_b_" + str(i)
    allmol_m7_h2o_out_b[name] = mol 

In [21]:
# align:
for key, mol in allmol_m7_h2o_in_b.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
for key, mol in allmol_m7_h2o_out_b.items():   
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))    

In [22]:
# view:
p7_b_handles=[]

p7_b_in = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7_h2o_in_b.items():
    mb = Chem.MolToMolBlock(mol)
    p7_b_in.addModel(mb,'sdf')
p7_b_in.setStyle({'stick':{'radius':'0.15'}})
p7_b_in.setBackgroundColor('0xeeeeee')
p7_b_in.zoomTo()    
p7_b_handles.append(p7_b_in)

p7_b_out = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7_h2o_out_b.items():
    mb = Chem.MolToMolBlock(mol)
    p7_b_out.addModel(mb,'sdf')
p7_b_out.setStyle({'stick':{'radius':'0.15'}})
p7_b_out.setBackgroundColor('0xeeeeee')
p7_b_out.zoomTo()    
p7_b_handles.append(p7_b_out)

In [23]:
p7_b_handles[0].insert('m7_h2o_in_b')

In [24]:
p7_b_handles[1].insert('m7_h2o_out_b')

We will select all the conformers generated with the Balloon software for further refinement with DFT methods.

## Conformers generated with the RDKit software:

In [25]:
inps_m7_h2o_in_rdkit  = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/rdkit/results_starting_from_m7_h2o_in/*.sdf')
inps_m7_h2o_out_rdkit = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/rdkit/results_starting_from_m7_h2o_out/*.sdf')

In [26]:
e_m7_h2o_in_rdkit = grep_energies_from_sdf_outputs(inps_m7_h2o_in_rdkit)
e_m7_h2o_out_rdkit = grep_energies_from_sdf_outputs(inps_m7_h2o_out_rdkit)

In [27]:
# write conformers to dictionaries
allmol_m7_h2o_in_rdkit = {}
suppl_m7_h2o_in_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/rdkit/m7_h2o_in_result.sdf')
allmol_m7_h2o_out_rdkit = {}
suppl_m7_h2o_out_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/rdkit/m7_h2o_out_result.sdf')

for i, mol in enumerate(suppl_m7_h2o_in_rdkit):
    name = "m7_h2o_in_rdkit_" + str(i)
    allmol_m7_h2o_in_rdkit[name] = mol  
for i, mol in enumerate(suppl_m7_h2o_out_rdkit):
    name = "m7_h2o_out_rdkit_" + str(i)
    allmol_m7_h2o_out_rdkit[name] = mol     

In [28]:
# align:
for key, mol in allmol_m7_h2o_in_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
for key, mol in allmol_m7_h2o_out_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles)) 
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))

In [29]:
%%html
<table>
  <tr>
    <td id="m7_h2o_in_rdkit" ></td>
    <td id="m7_h2o_out_rdkit"  ></td>
  <tr>
    <td> m7_h2o_in_rdkit </td>
    <td> m7_h2o_out_rdkit  </td>  
  </tr>
</table>

0,1
,
m7_h2o_in_rdkit,m7_h2o_out_rdkit


In [30]:
# view:
p7_h2o_rdkit_handles=[]

p7_h2o_in_rdkit = py3Dmol.view(width=400,height=400)
p7_h2o_in_rdkit.removeAllModels()
for key, mol in allmol_m7_h2o_in_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p7_h2o_in_rdkit.addModel(mb,'sdf')    
p7_h2o_in_rdkit.setStyle({'stick':{'radius':'0.15'}})
p7_h2o_in_rdkit.setBackgroundColor('0xeeeeee')
p7_h2o_in_rdkit.zoomTo()
p7_h2o_rdkit_handles.append(p7_h2o_in_rdkit)

p7_h2o_out_rdkit = py3Dmol.view(width=400,height=400)
p7_h2o_out_rdkit.removeAllModels()
for key, mol in allmol_m7_h2o_out_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p7_h2o_out_rdkit.addModel(mb,'sdf')    
p7_h2o_out_rdkit.setStyle({'stick':{'radius':'0.15'}})
p7_h2o_out_rdkit.setBackgroundColor('0xeeeeee')
p7_h2o_out_rdkit.zoomTo()
p7_h2o_rdkit_handles.append(p7_h2o_out_rdkit)

In [31]:
p7_h2o_rdkit_handles[0].insert('m7_h2o_in_rdkit')

In [32]:
p7_h2o_rdkit_handles[1].insert('m7_h2o_out_rdkit')

### pre-screening

In [33]:
allmol_m7_h2o_rdkit = {}
allmol_m7_h2o_rdkit.update(allmol_m7_h2o_in_rdkit)
allmol_m7_h2o_rdkit.update(allmol_m7_h2o_out_rdkit)

energy_m7_h2o_rdkit = {}
energy_m7_h2o_rdkit.update(e_m7_h2o_in_rdkit)
energy_m7_h2o_rdkit.update(e_m7_h2o_out_rdkit)

rms_m7_h2o_rdkit = {}
for key, mol in allmol_m7_h2o_rdkit.items():
    rms_m7_h2o_rdkit[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m7_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1_rdkit[key], rms_m1_rdkit[key]))

In [34]:
rms_sorted = sorted(rms_m7_h2o_rdkit.items(), key=lambda x: x[1])
rms_thresh = 0.05

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m7_h2o_rdkit, rms_thresh)

for mol in to_be_deleted:
    del allmol_m7_h2o_rdkit[mol]
    del energy_m7_h2o_rdkit[mol]
    del rms_m7_h2o_rdkit[mol]  

Conformers which will be deleted:
['m7_h2o_out_rdkit_67', 'm7_h2o_in_rdkit_70', 'm7_h2o_out_rdkit_70', 'm7_h2o_in_rdkit_73', 'm7_h2o_out_rdkit_38', 'm7_h2o_in_rdkit_38', 'm7_h2o_in_rdkit_29', 'm7_h2o_out_rdkit_29', 'm7_h2o_in_rdkit_41', 'm7_h2o_out_rdkit_41', 'm7_h2o_in_rdkit_62', 'm7_h2o_in_rdkit_6', 'm7_h2o_out_rdkit_6', 'm7_h2o_in_rdkit_1', 'm7_h2o_out_rdkit_1', 'm7_h2o_in_rdkit_32', 'm7_h2o_out_rdkit_32', 'm7_h2o_in_rdkit_60', 'm7_h2o_out_rdkit_60', 'm7_h2o_out_rdkit_28', 'm7_h2o_in_rdkit_28', 'm7_h2o_in_rdkit_54', 'm7_h2o_out_rdkit_13', 'm7_h2o_in_rdkit_13', 'm7_h2o_in_rdkit_71', 'm7_h2o_out_rdkit_71', 'm7_h2o_out_rdkit_46', 'm7_h2o_in_rdkit_46', 'm7_h2o_out_rdkit_8', 'm7_h2o_in_rdkit_63', 'm7_h2o_out_rdkit_63', 'm7_h2o_in_rdkit_12', 'm7_h2o_out_rdkit_12', 'm7_h2o_in_rdkit_36', 'm7_h2o_out_rdkit_36', 'm7_h2o_in_rdkit_31', 'm7_h2o_out_rdkit_31', 'm7_h2o_in_rdkit_14', 'm7_h2o_out_rdkit_14', 'm7_h2o_out_rdkit_11', 'm7_h2o_in_rdkit_51', 'm7_h2o_out_rdkit_51', 'm7_h2o_in_rdkit_11', 'm7

Below we will align the selected conformers:

In [35]:
for key, mol in allmol_m7_h2o_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
    
p_r = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7_h2o_rdkit.items():
    mb = Chem.MolToMolBlock(mol)
    p_r.addModel(mb,'sdf')
p_r.setStyle({'stick':{'radius':'0.15'}})
p_r.setBackgroundColor('0xeeeeee')
p_r.zoomTo()
p_r.show()

In [36]:
with open("/home/gosia/work/work_on_gitlab/icho/calcs/m7-1h2o/list_selected_conformers_from_rdkit", "w") as f:
    for key, mol in allmol_m7_h2o_rdkit.items():
        f.write(key+"\n")  

## Summary

Now let's generate a list of all conformers (from all programs used, as presented above). 

In this case we will not do further screening for duplicates.
We will take:

* all conformers generated with the Balloon software - the reason is that we cannot easily screen for duplicates since conformer generation and optimization options also change the position of H2O molecule and we could accidentally remove two similar M conformers for which the position of H2O molecule is very different. 

* selected (pre-screened for duplicates) conformers generated with the RDKit software - the reason is that the conformers generated with RDKit do not change much the position of H2O molecule (what we can see by playing a movie of all conformers with the Avogadro software) and that there are many more of them (as we can see from figures above)

Besides, we need to remember that the MM methods are not ideal for non-covalently bonded systems, therefore we will rather try to take more conformers and refine them by DFT methods.

In [27]:
allmol_m1_h2o = {}
allmol_m1_h2o.update(allmol_m1_h2o_b)
allmol_m1_h2o.update(allmol_m1_h2o_rdkit)

energy_m1_h2o = {}
energy_m1_h2o.update(energy_m1_h2o_b)
energy_m1_h2o.update(energy_m1_h2o_rdkit)

rms_m1_h2o = {}
for key, mol in allmol_m1_h2o.items():
    rms_m1_h2o[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1[key], rms_m1[key]))

NameError: name 'allmol_m1_h2o_b' is not defined

In [36]:
rms_sorted = sorted(rms_m1_h2o.items(), key=lambda x: x[1])
rms_thresh = 0.1

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_h2o, rms_thresh)

for mol in to_be_deleted:
    del allmol_m1_h2o[mol]
    del energy_m1_h2o[mol]
    del rms_m1_h2o[mol]  

Conformers which will be deleted:
['m1_h2o_in_rdkit_19', 'm1_h2o_in_rdkit_34', 'm1_h2o_out_b_10', 'm1_h2o_out_rdkit_11', 'm1_h2o_in_rdkit_14', 'm1_h2o_in_rdkit_2', 'm1_h2o_out_rdkit_32', 'm1_h2o_in_rdkit_0']


Finally we can align all conformers which will further be used as starting points in DFT geometry optimizations:

In [37]:
print(len(allmol_m1_h2o))

for key, mol in allmol_m1_h2o.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_h2o.items():
    mb = Chem.MolToMolBlock(mol)
    p.addModel(mb,'sdf')
p.setStyle({'stick':{'radius':'0.15'}})
p.setBackgroundColor('0xeeeeee')
p.zoomTo()
p.show()

13


Write the selected conformers' names to the list "list_selected_conformers_from_balloon_rdkit". It will be used to generate Gaussian inputs:

In [38]:
with open("/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/list_selected_conformers_from_ballon_rdkit", "w") as f:
    for key, mol in allmol_m1_h2o.items():
        f.write(key+"\n")

energy_sorted = sorted(energy_m1_h2o.items(), key=lambda x: x[1])
print(energy_sorted)
with open("/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/detailed_list_selected_conformers_from_ballon_rdkit", "w") as f:
    for pair in energy_sorted:
        f.write("{0:30}   {1}\n".format(pair[0], pair[1]))          

[('m1_h2o_out_b_0', 51.21072384846502), ('m1_h2o_out_b_3', 52.05078819235895), ('m1_h2o_out_b_5', 52.1643431238253), ('m1_h2o_out_b_7', 52.46459240004303), ('m1_h2o_out_b_9', 52.959593601332905), ('m1_h2o_out_rdkit_41', 53.73), ('m1_h2o_out_b_12', 54.05849757183306), ('m1_h2o_out_b_14', 54.789313762111476), ('m1_h2o_out_rdkit_4', 55.09), ('m1_h2o_in_rdkit_11', 55.42), ('m1_h2o_out_rdkit_1', 55.66), ('m1_h2o_in_rdkit_26', 56.06), ('m1_h2o_out_b_17', 56.68367774518494)]
