# Exploring conformational space of "M1+nH2O" (n = 1, 2) complexes

In this notebook we present and analyze selected structures, technical notes are [here](www.gitlab.com/user/gosia/icho).

In [None]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [1]:
import glob
import py3Dmol

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdMolAlign
from rdkit.Chem.Draw import IPythonConsole
from rdkit import rdBase
print(rdBase.rdkitVersion)
import os,time
print( time.asctime())

2016.09.4
Thu Apr 13 10:51:57 2017


In [2]:
# Functions used in this notebook:

def grep_energies_from_sdf_outputs(files):
    energies = {}
    for inp in files:
        with open(inp,'r') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                if "M  END" in line:
                    energies[os.path.splitext(os.path.basename(inp))[0]] = float(lines[i+1])
    return energies

def find_duplicates(rms_sorted, energy, rms_thresh):
    i = 0
    to_be_deleted = []
    while i < len(rms_sorted):
        j = i + 1
        while j < len(rms_sorted):
            if rms_sorted[i][0] in to_be_deleted:
                i = i + 1
                j = j + 1
            elif rms_sorted[j][0] in to_be_deleted:
                j = j + 1
            else:
                rms1 = rms_sorted[i][1]
                rms2 = rms_sorted[j][1]
                if (rms2 - rms1) < rms_thresh:
                    if energy[rms_sorted[i][0]] < energy[rms_sorted[j][0]]:
                        to_be_deleted.append(rms_sorted[j][0])
                    else:
                        to_be_deleted.append(rms_sorted[i][0])
                else:
                    break
        i = i + 1
    if to_be_deleted:
        print("Conformers which will be deleted:")    
        print(to_be_deleted)
    return to_be_deleted

In [3]:
# useful for later:
# "core" is a part of a molecule, which we wish to be the "most-aligned" among multiple conformers
smiles      = 'O=C1NCCNC(=O)c2nc(C(=O)NCCNC(=O)c3nc1ccc3)ccc2'
core_smiles = 'n1ccccc1'

m1 = Chem.AddHs(Chem.MolFromSmiles(smiles))
core_m1 = m1.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))

templ_m1 = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/balloon/m1_crystal.sdf')
m1_crystal = templ_m1[0]

## M1 + H2O

M1 + H2O structures were generated from the crystal geometry of isolated M1 macrocycle, to which we have added :

* 1 H2O molecule "inside" the macrocycle ("m1_h2o_in", left fig. below)

* 1 H2O molecule "outside" the macrocycle ("m1_h2o_out", right fig. below)

manually in Avogadro software; then pre-optimized with MM implemented in Avogadro.

On figures below we present already pre-optimized "m1 + h2o" complexes, which serve as starting geometries for exploration of the conformational space.

In [4]:
%%html
<table>
  <tr>
    <td id="m1_h2o_in" ></td>
    <td id="m1_h2o_out" ></td>
  <tr>
    <td> m1_h2o_in </td>
    <td> m1_h2o_out  </td>  
  </tr>
</table>

0,1
,
m1_h2o_in,m1_h2o_out


In [5]:
m_in = open('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/m1_h2o_in.xyz', 'r').read()
p_in = py3Dmol.view(width=300,height=300)
p_in.addModel(m_in,'xyz')
p_in.setStyle({'stick':{'radius':'0.15'}})
p_in.setBackgroundColor('0xeeeeee')
p_in.zoomTo()
p_in.insert('m1_h2o_in')

In [6]:
m_out = open('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/m1_h2o_out.xyz', 'r').read()
p_out = py3Dmol.view(width=300,height=300)
p_out.addModel(m_out,'xyz')
p_out.setStyle({'stick':{'radius':'0.15'}})
p_out.setBackgroundColor('0xeeeeee')
p_out.zoomTo()
p_out.insert('m1_h2o_out')

### Conformers generated with the Balloon software:

In both cases the Balloon software was asked to generate 100 conformers using the genertic algorithm with default settings (only "maxPostprocessIter" increased to 150 and "nGenerations" to 300).

Below we will present aligned structures of macrocycle M1 conformers in "M1 + H2O" (without showing H2O molecules).

In [7]:
inps_m1_h2o_in_b  = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/balloon/results_starting_from_m1_h2o_in/*.sdf')
inps_m1_h2o_out_b = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/balloon/results_starting_from_m1_h2o_out/*.sdf')

In [8]:
e_m1_h2o_in_b  = grep_energies_from_sdf_outputs(inps_m1_h2o_in_b)
e_m1_h2o_out_b = grep_energies_from_sdf_outputs(inps_m1_h2o_out_b)

In [9]:
%%html
<table>
  <tr>
    <td id="m1_h2o_in_b" ></td>
    <td id="m1_h2o_out_b"  ></td>
  <tr>
    <td> m1_h2o_in_b </td>
    <td> m1_h2o_out_b  </td>  
  </tr>
</table>

0,1
,
m1_h2o_in_b,m1_h2o_out_b


In [10]:
# write conformers to dictionaries
allmol_m1_h2o_in_b  = {}
allmol_m1_h2o_out_b = {}
suppl_m1_h2o_in_b  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/balloon/m1_h2o_in_result.sdf')
suppl_m1_h2o_out_b = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/balloon/m1_h2o_out_result.sdf')

for i, mol in enumerate(suppl_m1_h2o_in_b):
    name = "m1_h2o_in_b_" + str(i)
    allmol_m1_h2o_in_b[name] = mol
for i, mol in enumerate(suppl_m1_h2o_out_b):
    name = "m1_h2o_out_b_" + str(i)
    allmol_m1_h2o_out_b[name] = mol 

In [11]:
# align:
for key, mol in allmol_m1_h2o_in_b.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
for key, mol in allmol_m1_h2o_out_b.items():   
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))    

In [12]:
# view:
p1_b_handles=[]

p1_b_in = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_h2o_in_b.items():
    mb = Chem.MolToMolBlock(mol)
    p1_b_in.addModel(mb,'sdf')
p1_b_in.setStyle({'stick':{'radius':'0.15'}})
p1_b_in.setBackgroundColor('0xeeeeee')
p1_b_in.zoomTo()    
p1_b_handles.append(p1_b_in)

p1_b_out = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_h2o_out_b.items():
    mb = Chem.MolToMolBlock(mol)
    p1_b_out.addModel(mb,'sdf')
p1_b_out.setStyle({'stick':{'radius':'0.15'}})
p1_b_out.setBackgroundColor('0xeeeeee')
p1_b_out.zoomTo()    
p1_b_handles.append(p1_b_out)

In [13]:
p1_b_handles[0].insert('m1_h2o_in_b')

In [14]:
p1_b_handles[1].insert('m1_h2o_out_b')

### pre-screening

Some of the generated conformers are very much alike. To remove potential duplicates which were not "caught" by the Balloon program, we can compare the energies (preoptimized with MM) and the RMSD calculated against a reference structure (here: the crystal structure of M1). It does not matter against which structure we are aligning the conformers, since we are interested in relative RMS between them.

First let's print the energies and RMS values:

In [15]:
allmol_m1_h2o_b = {}
allmol_m1_h2o_b.update(allmol_m1_h2o_in_b)
allmol_m1_h2o_b.update(allmol_m1_h2o_out_b)

energy_m1_h2o_b = {}
energy_m1_h2o_b.update(e_m1_h2o_in_b)
energy_m1_h2o_b.update(e_m1_h2o_out_b)

rms_m1_h2o_b = {}
for key, mol in allmol_m1_h2o_b.items():
    rms_m1_h2o_b[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))

Then we can introduce some thresholds, for instance:

* if two conformers differ by less than 0.01 in RMS (measured against the reference structure), then select the one with the lower energy

In [16]:
rms_sorted = sorted(rms_m1_h2o_b.items(), key=lambda x: x[1])
rms_thresh = 0.05

print("List sorted by RMS:")
for i, t in enumerate(rms_sorted):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted[i][0], energy_m1_h2o_b[rms_sorted[i][0]], rms_sorted[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_h2o_b, rms_thresh)

for mol in to_be_deleted:
    del allmol_m1_h2o_b[mol]
    del energy_m1_h2o_b[mol]
    del rms_m1_h2o_b[mol]  

List sorted by RMS:
name = m1_h2o_out_b_16, E = 56.240346, RMS = 0.198288
name = m1_h2o_out_b_14, E = 54.789314, RMS = 0.223653
name = m1_h2o_in_b_15, E = 57.582827, RMS = 0.316532
name = m1_h2o_out_b_12, E = 54.058498, RMS = 0.331301
name = m1_h2o_in_b_9, E = 53.096084, RMS = 0.343899
name = m1_h2o_in_b_4, E = 52.014900, RMS = 0.427144
name = m1_h2o_out_b_9, E = 52.959594, RMS = 0.441097
name = m1_h2o_in_b_10, E = 53.446733, RMS = 0.463845
name = m1_h2o_in_b_6, E = 52.215628, RMS = 0.494019
name = m1_h2o_in_b_16, E = 58.387284, RMS = 0.520676
name = m1_h2o_out_b_15, E = 56.119637, RMS = 0.542196
name = m1_h2o_out_b_4, E = 52.087903, RMS = 0.564221
name = m1_h2o_out_b_0, E = 51.210724, RMS = 0.596499
name = m1_h2o_out_b_13, E = 54.225384, RMS = 0.597136
name = m1_h2o_out_b_2, E = 51.919702, RMS = 0.613838
name = m1_h2o_in_b_14, E = 55.832805, RMS = 0.626317
name = m1_h2o_out_b_1, E = 51.269769, RMS = 0.636244
name = m1_h2o_in_b_0, E = 51.092666, RMS = 0.665046
name = m1_h2o_in_b_2, E =

Below we will align the selected conformers:

In [17]:
for key, mol in allmol_m1_h2o_b.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p_b = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_h2o_b.items():
    mb = Chem.MolToMolBlock(mol)
    p_b.addModel(mb,'sdf')
p_b.setStyle({'stick':{'radius':'0.15'}})
p_b.setBackgroundColor('0xeeeeee')
p_b.zoomTo()
p_b.show()

## Conformers generated with the RDKit software:

In [18]:
inps_m1_h2o_in_rdkit  = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/rdkit/results_starting_from_m1_h2o_in/*.sdf')
inps_m1_h2o_out_rdkit = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/rdkit/results_starting_from_m1_h2o_out/*.sdf')

In [19]:
e_m1_h2o_in_rdkit = grep_energies_from_sdf_outputs(inps_m1_h2o_in_rdkit)
e_m1_h2o_out_rdkit = grep_energies_from_sdf_outputs(inps_m1_h2o_out_rdkit)

In [20]:
# write conformers to dictionaries
allmol_m1_h2o_in_rdkit = {}
suppl_m1_h2o_in_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/rdkit/m1_h2o_in_result.sdf')
allmol_m1_h2o_out_rdkit = {}
suppl_m1_h2o_out_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/rdkit/m1_h2o_out_result.sdf')

for i, mol in enumerate(suppl_m1_h2o_in_rdkit):
    name = "m1_h2o_in_rdkit_" + str(i)
    allmol_m1_h2o_in_rdkit[name] = mol  
for i, mol in enumerate(suppl_m1_h2o_out_rdkit):
    name = "m1_h2o_out_rdkit_" + str(i)
    allmol_m1_h2o_out_rdkit[name] = mol     

In [21]:
# align:
for key, mol in allmol_m1_h2o_in_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
for key, mol in allmol_m1_h2o_out_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles)) 
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))

In [22]:
%%html
<table>
  <tr>
    <td id="m1_h2o_in_rdkit" ></td>
    <td id="m1_h2o_out_rdkit"  ></td>
  <tr>
    <td> m1_h2o_in_rdkit </td>
    <td> m1_h2o_out_rdkit  </td>  
  </tr>
</table>

0,1
,
m1_h2o_in_rdkit,m1_h2o_out_rdkit


In [23]:
# view:
p1_h2o_rdkit_handles=[]

p1_h2o_in_rdkit = py3Dmol.view(width=400,height=400)
p1_h2o_in_rdkit.removeAllModels()
for key, mol in allmol_m1_h2o_in_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p1_h2o_in_rdkit.addModel(mb,'sdf')    
p1_h2o_in_rdkit.setStyle({'stick':{'radius':'0.15'}})
p1_h2o_in_rdkit.setBackgroundColor('0xeeeeee')
p1_h2o_in_rdkit.zoomTo()
p1_h2o_rdkit_handles.append(p1_h2o_in_rdkit)

p1_h2o_out_rdkit = py3Dmol.view(width=400,height=400)
p1_h2o_out_rdkit.removeAllModels()
for key, mol in allmol_m1_h2o_out_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p1_h2o_out_rdkit.addModel(mb,'sdf')    
p1_h2o_out_rdkit.setStyle({'stick':{'radius':'0.15'}})
p1_h2o_out_rdkit.setBackgroundColor('0xeeeeee')
p1_h2o_out_rdkit.zoomTo()
p1_h2o_rdkit_handles.append(p1_h2o_out_rdkit)

In [24]:
p1_h2o_rdkit_handles[0].insert('m1_h2o_in_rdkit')

In [25]:
p1_h2o_rdkit_handles[1].insert('m1_h2o_out_rdkit')

### pre-screening

In [26]:
allmol_m1_h2o_rdkit = {}
allmol_m1_h2o_rdkit.update(allmol_m1_h2o_in_rdkit)
allmol_m1_h2o_rdkit.update(allmol_m1_h2o_out_rdkit)

energy_m1_h2o_rdkit = {}
energy_m1_h2o_rdkit.update(e_m1_h2o_in_rdkit)
energy_m1_h2o_rdkit.update(e_m1_h2o_out_rdkit)

rms_m1_h2o_rdkit = {}
for key, mol in allmol_m1_h2o_rdkit.items():
    rms_m1_h2o_rdkit[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1_rdkit[key], rms_m1_rdkit[key]))

In [27]:
rms_sorted = sorted(rms_m1_h2o_rdkit.items(), key=lambda x: x[1])
rms_thresh = 0.05

print("List sorted by RMS:")
for i, t in enumerate(rms_sorted):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted[i][0], energy_m1_h2o_rdkit[rms_sorted[i][0]], rms_sorted[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_h2o_rdkit, rms_thresh)

for mol in to_be_deleted:
    del allmol_m1_h2o_rdkit[mol]
    del energy_m1_h2o_rdkit[mol]
    del rms_m1_h2o_rdkit[mol]  

List sorted by RMS:
name = m1_h2o_in_rdkit_19, E = 53.960000, RMS = 0.823541
name = m1_h2o_in_rdkit_6, E = 62.850000, RMS = 1.156342
name = m1_h2o_out_rdkit_6, E = 62.850000, RMS = 1.156346
name = m1_h2o_in_rdkit_23, E = 62.850000, RMS = 1.156768
name = m1_h2o_out_rdkit_23, E = 62.850000, RMS = 1.156768
name = m1_h2o_in_rdkit_34, E = 59.990000, RMS = 1.192053
name = m1_h2o_out_rdkit_8, E = 65.560000, RMS = 1.241698
name = m1_h2o_in_rdkit_31, E = 60.250000, RMS = 1.256433
name = m1_h2o_out_rdkit_31, E = 60.250000, RMS = 1.256435
name = m1_h2o_out_rdkit_47, E = 57.260000, RMS = 1.280461
name = m1_h2o_in_rdkit_48, E = 57.260000, RMS = 1.280482
name = m1_h2o_out_rdkit_33, E = 62.220000, RMS = 1.282967
name = m1_h2o_in_rdkit_33, E = 62.220000, RMS = 1.282968
name = m1_h2o_in_rdkit_24, E = 62.220000, RMS = 1.282993
name = m1_h2o_in_rdkit_45, E = 62.220000, RMS = 1.282994
name = m1_h2o_out_rdkit_24, E = 62.220000, RMS = 1.282998
name = m1_h2o_out_rdkit_11, E = 55.420000, RMS = 1.298907
name =

Below we will align the selected conformers:

In [28]:
for key, mol in allmol_m1_h2o_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p_r = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_h2o_rdkit.items():
    mb = Chem.MolToMolBlock(mol)
    p_r.addModel(mb,'sdf')
p_r.setStyle({'stick':{'radius':'0.15'}})
p_r.setBackgroundColor('0xeeeeee')
p_r.zoomTo()
p_r.show()

## Summary

Now let's generate a list of all conformers (from all programs used, as presented above). We can further pre-screen all the structures and remove potential duplicates. Here we can also use more crude RMSD threshold.

In [29]:
allmol_m1_h2o = {}
allmol_m1_h2o.update(allmol_m1_h2o_b)
allmol_m1_h2o.update(allmol_m1_h2o_rdkit)

energy_m1_h2o = {}
energy_m1_h2o.update(energy_m1_h2o_b)
energy_m1_h2o.update(energy_m1_h2o_rdkit)

rms_m1_h2o = {}
for key, mol in allmol_m1_h2o.items():
    rms_m1_h2o[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1[key], rms_m1[key]))

In [30]:
rms_sorted = sorted(rms_m1_h2o.items(), key=lambda x: x[1])
rms_thresh = 0.1

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_h2o, rms_thresh)

for mol in to_be_deleted:
    del allmol_m1_h2o[mol]
    del energy_m1_h2o[mol]
    del rms_m1_h2o[mol]  

Conformers which will be deleted:
['m1_h2o_in_b_9', 'm1_h2o_in_b_6', 'm1_h2o_out_b_0', 'm1_h2o_out_b_3', 'm1_h2o_in_rdkit_19', 'm1_h2o_in_b_8', 'm1_h2o_in_rdkit_34', 'm1_h2o_out_b_7', 'm1_h2o_out_rdkit_11', 'm1_h2o_in_rdkit_11', 'm1_h2o_in_rdkit_14', 'm1_h2o_in_rdkit_2', 'm1_h2o_out_rdkit_32', 'm1_h2o_in_rdkit_0']


Finally we can align all conformers which will further be used as starting points in DFT geometry optimizations:

In [31]:
print(len(allmol_m1_h2o))

for key, mol in allmol_m1_h2o.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_h2o.items():
    mb = Chem.MolToMolBlock(mol)
    p.addModel(mb,'sdf')
p.setStyle({'stick':{'radius':'0.15'}})
p.setBackgroundColor('0xeeeeee')
p.zoomTo()
p.show()

11


Write the selected conformers' names to the list "list_selected_conformers_from_balloon_rdkit". It will be used to generate Gaussian inputs:

In [32]:
with open("/home/gosia/work/work_on_gitlab/icho/calcs/m1-1h2o/list_selected_conformers_from_ballon_rdkit", "w") as f:
    for key, mol in allmol_m1_h2o.items():
        f.write(key+"\n")      

## M1 + 2H2O

M1 + 2H2O structures were generated from the crystal geometry of isolated M1 macrocycle, to which we have added :

* 2 H2O molecules "inside" the macrocycle ("m1_2h2o_in", first from the left fig. below)

* 2 H2O molecules "outside" the macrocycle ("m1_2h2o_out", second from the left fig. below)

* 1 H2O molecule "inside" and 1 H2O molecule "outside" the macrocycle ("m1_2h2o_inout", third from the left fig. below)

manually in Avogadro software; then pre-optimized with MM implemented in Avogadro.

On figures below we present already pre-optimized "m1+2h2o" complexes, which serve as starting geometries for exploration of the conformational space.

In [33]:
%%html
<table>
  <tr>
    <td id="m1_2h2o_in" ></td>
    <td id="m1_2h2o_out" ></td>
    <td id="m1_2h2o_inout" ></td>    
  <tr>
    <td> m1_2h2o_in </td>
    <td> m1_2h2o_out  </td>  
    <td> m1_2h2o_inout  </td>     
  </tr>
</table>

0,1,2
,,
m1_2h2o_in,m1_2h2o_out,m1_2h2o_inout


In [34]:
m_in = open('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/m1_2h2o_in.xyz', 'r').read()
p_in = py3Dmol.view(width=300,height=300)
p_in.addModel(m_in,'xyz')
p_in.setStyle({'stick':{'radius':'0.15'}})
p_in.setBackgroundColor('0xeeeeee')
p_in.zoomTo()
p_in.insert('m1_2h2o_in')

In [35]:
m_out = open('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/m1_2h2o_out.xyz', 'r').read()
p_out = py3Dmol.view(width=300,height=300)
p_out.addModel(m_out,'xyz')
p_out.setStyle({'stick':{'radius':'0.15'}})
p_out.setBackgroundColor('0xeeeeee')
p_out.zoomTo()
p_out.insert('m1_2h2o_out')

In [36]:
m_inout = open('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/m1_2h2o_inout.xyz', 'r').read()
p_inout = py3Dmol.view(width=300,height=300)
p_inout.addModel(m_inout,'xyz')
p_inout.setStyle({'stick':{'radius':'0.15'}})
p_inout.setBackgroundColor('0xeeeeee')
p_inout.zoomTo()
p_inout.insert('m1_2h2o_inout')

### Conformers generated with the Balloon software:

In both cases the Balloon software was asked to generate 100 conformers using the genertic algorithm with default settings (only "maxPostprocessIter" increased to 150 and "nGenerations" to 300).

Below we will present aligned structures of macrocycle M1 conformers in "M1 + H2O" (without showing H2O molecules).

In [37]:
inps_m1_2h2o_in_b  = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/balloon/results_starting_from_m1_2h2o_in/*.sdf')
inps_m1_2h2o_out_b = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/balloon/results_starting_from_m1_2h2o_out/*.sdf')
inps_m1_2h2o_inout_b = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/balloon/results_starting_from_m1_2h2o_inout/*.sdf')

In [38]:
e_m1_2h2o_in_b  = grep_energies_from_sdf_outputs(inps_m1_2h2o_in_b)
e_m1_2h2o_out_b = grep_energies_from_sdf_outputs(inps_m1_2h2o_out_b)
e_m1_2h2o_inout_b = grep_energies_from_sdf_outputs(inps_m1_2h2o_inout_b)

In [46]:
%%html
<table>
  <tr>
    <td id="m1_2h2o_in_b" ></td>
    <td id="m1_2h2o_out_b"  ></td>
    <td id="m1_2h2o_inout_b"  ></td>    
  <tr>
    <td> m1_2h2o_in_b </td>
    <td> m1_2h2o_out_b  </td>  
    <td> m1_2h2o_inout_b  </td>     
  </tr>
</table>

0,1,2
,,
m1_2h2o_in_b,m1_2h2o_out_b,m1_2h2o_inout_b


In [40]:
# write conformers to dictionaries
allmol_m1_2h2o_in_b  = {}
allmol_m1_2h2o_out_b = {}
allmol_m1_2h2o_inout_b = {}
suppl_m1_2h2o_in_b  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/balloon/m1_2h2o_in_result.sdf')
suppl_m1_2h2o_out_b = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/balloon/m1_2h2o_out_result.sdf')
suppl_m1_2h2o_inout_b = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/balloon/m1_2h2o_inout_result.sdf')

for i, mol in enumerate(suppl_m1_2h2o_in_b):
    name = "m1_2h2o_in_b_" + str(i)
    allmol_m1_2h2o_in_b[name] = mol
for i, mol in enumerate(suppl_m1_2h2o_out_b):
    name = "m1_2h2o_out_b_" + str(i)
    allmol_m1_2h2o_out_b[name] = mol 
for i, mol in enumerate(suppl_m1_2h2o_inout_b):
    name = "m1_2h2o_inout_b_" + str(i)
    allmol_m1_2h2o_inout_b[name] = mol     

In [41]:
# align:
for key, mol in allmol_m1_2h2o_in_b.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
for key, mol in allmol_m1_2h2o_out_b.items():   
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))   
for key, mol in allmol_m1_2h2o_inout_b.items():   
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))      

In [47]:
# view:
p1_b_handles=[]

p1_b_in = py3Dmol.view(width=300,height=300)
for key, mol in allmol_m1_2h2o_in_b.items():
    mb = Chem.MolToMolBlock(mol)
    p1_b_in.addModel(mb,'sdf')
p1_b_in.setStyle({'stick':{'radius':'0.15'}})
p1_b_in.setBackgroundColor('0xeeeeee')
p1_b_in.zoomTo()    
p1_b_handles.append(p1_b_in)

p1_b_out = py3Dmol.view(width=300,height=300)
for key, mol in allmol_m1_2h2o_out_b.items():
    mb = Chem.MolToMolBlock(mol)
    p1_b_out.addModel(mb,'sdf')
p1_b_out.setStyle({'stick':{'radius':'0.15'}})
p1_b_out.setBackgroundColor('0xeeeeee')
p1_b_out.zoomTo()    
p1_b_handles.append(p1_b_out)

p1_b_inout = py3Dmol.view(width=300,height=300)
for key, mol in allmol_m1_2h2o_inout_b.items():
    mb = Chem.MolToMolBlock(mol)
    p1_b_inout.addModel(mb,'sdf')
p1_b_inout.setStyle({'stick':{'radius':'0.15'}})
p1_b_inout.setBackgroundColor('0xeeeeee')
p1_b_inout.zoomTo()    
p1_b_handles.append(p1_b_inout)

In [48]:
p1_b_handles[0].insert('m1_2h2o_in_b')

In [49]:
p1_b_handles[1].insert('m1_2h2o_out_b')

In [50]:
p1_b_handles[2].insert('m1_2h2o_inout_b')


### pre-screening

Some of the generated conformers are very much alike. To remove potential duplicates which were not "caught" by the Balloon program, we can compare the energies (preoptimized with MM) and the RMSD calculated against a reference structure (here: the crystal structure of M1). It does not matter against which structure we are aligning the conformers, since we are interested in relative RMS between them.

First let's print the energies and RMS values:


In [51]:
allmol_m1_2h2o_b = {}
allmol_m1_2h2o_b.update(allmol_m1_2h2o_in_b)
allmol_m1_2h2o_b.update(allmol_m1_2h2o_out_b)

energy_m1_2h2o_b = {}
energy_m1_2h2o_b.update(e_m1_2h2o_in_b)
energy_m1_2h2o_b.update(e_m1_2h2o_out_b)

rms_m1_2h2o_b = {}
for key, mol in allmol_m1_2h2o_b.items():
    rms_m1_2h2o_b[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))

In [52]:
rms_sorted = sorted(rms_m1_2h2o_b.items(), key=lambda x: x[1])
rms_thresh = 0.05

print("List sorted by RMS:")
for i, t in enumerate(rms_sorted):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted[i][0], energy_m1_2h2o_b[rms_sorted[i][0]], rms_sorted[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_2h2o_b, rms_thresh)

for mol in to_be_deleted:
    del allmol_m1_2h2o_b[mol]
    del energy_m1_2h2o_b[mol]
    del rms_m1_2h2o_b[mol]  

List sorted by RMS:
name = m1_2h2o_in_b_29, E = 54.890389, RMS = 0.226267
name = m1_2h2o_out_b_13, E = 54.689126, RMS = 0.234887
name = m1_2h2o_in_b_30, E = 55.355135, RMS = 0.240480
name = m1_2h2o_in_b_28, E = 54.757403, RMS = 0.259561
name = m1_2h2o_out_b_11, E = 53.750620, RMS = 0.306094
name = m1_2h2o_out_b_14, E = 56.054597, RMS = 0.346971
name = m1_2h2o_in_b_27, E = 54.056001, RMS = 0.354770
name = m1_2h2o_in_b_15, E = 51.625895, RMS = 0.407191
name = m1_2h2o_in_b_11, E = 51.001782, RMS = 0.436906
name = m1_2h2o_in_b_5, E = 50.805264, RMS = 0.454215
name = m1_2h2o_out_b_9, E = 51.992328, RMS = 0.467310
name = m1_2h2o_in_b_23, E = 52.184899, RMS = 0.471831
name = m1_2h2o_out_b_2, E = 50.811418, RMS = 0.482438
name = m1_2h2o_out_b_5, E = 50.923510, RMS = 0.486285
name = m1_2h2o_in_b_3, E = 50.699467, RMS = 0.488799
name = m1_2h2o_in_b_22, E = 52.036037, RMS = 0.490778
name = m1_2h2o_in_b_4, E = 50.783727, RMS = 0.504533
name = m1_2h2o_in_b_2, E = 50.612542, RMS = 0.507738
name = m1

In [53]:
for key, mol in allmol_m1_2h2o_b.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p_b = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_2h2o_b.items():
    mb = Chem.MolToMolBlock(mol)
    p_b.addModel(mb,'sdf')
p_b.setStyle({'stick':{'radius':'0.15'}})
p_b.setBackgroundColor('0xeeeeee')
p_b.zoomTo()
p_b.show()

### Conformers generated with the RDKit software:

In [54]:
inps_m1_2h2o_in_rdkit  = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/rdkit/results_starting_from_m1_2h2o_in/*.sdf')
inps_m1_2h2o_out_rdkit = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/rdkit/results_starting_from_m1_2h2o_out/*.sdf')
inps_m1_2h2o_inout_rdkit = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/rdkit/results_starting_from_m1_2h2o_inout/*.sdf')

In [55]:
e_m1_2h2o_in_rdkit = grep_energies_from_sdf_outputs(inps_m1_2h2o_in_rdkit)
e_m1_2h2o_out_rdkit = grep_energies_from_sdf_outputs(inps_m1_2h2o_out_rdkit)
e_m1_2h2o_inout_rdkit = grep_energies_from_sdf_outputs(inps_m1_2h2o_inout_rdkit)

In [56]:
# write conformers to dictionaries
allmol_m1_2h2o_in_rdkit = {}
suppl_m1_2h2o_in_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/rdkit/m1_2h2o_in_result.sdf')
allmol_m1_2h2o_out_rdkit = {}
suppl_m1_2h2o_out_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/rdkit/m1_2h2o_out_result.sdf')
allmol_m1_2h2o_inout_rdkit = {}
suppl_m1_2h2o_inout_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1-2h2o/rdkit/m1_2h2o_inout_result.sdf')

for i, mol in enumerate(suppl_m1_2h2o_in_rdkit):
    name = "m1_2h2o_in_rdkit_" + str(i)
    allmol_m1_2h2o_in_rdkit[name] = mol  
for i, mol in enumerate(suppl_m1_2h2o_out_rdkit):
    name = "m1_2h2o_out_rdkit_" + str(i)
    allmol_m1_2h2o_out_rdkit[name] = mol  
for i, mol in enumerate(suppl_m1_2h2o_inout_rdkit):
    name = "m1_2h2o_inout_rdkit_" + str(i)
    allmol_m1_2h2o_inout_rdkit[name] = mol    

In [57]:
# align:
for key, mol in allmol_m1_2h2o_in_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
for key, mol in allmol_m1_2h2o_out_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles)) 
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
for key, mol in allmol_m1_2h2o_inout_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles)) 
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))    

In [58]:
%%html
<table>
  <tr>
    <td id="m1_2h2o_in_rdkit" ></td>
    <td id="m1_2h2o_out_rdkit"  ></td>
    <td id="m1_2h2o_inout_rdkit"  ></td>    
  <tr>
    <td> m1_2h2o_in_rdkit </td>
    <td> m1_2h2o_out_rdkit  </td>  
    <td> m1_2h2o_niout_rdkit  </td>    
  </tr>
</table>

0,1,2
,,
m1_2h2o_in_rdkit,m1_2h2o_out_rdkit,m1_2h2o_niout_rdkit


In [59]:
# view:
p1_2h2o_rdkit_handles=[]

p1_2h2o_in_rdkit = py3Dmol.view(width=300,height=300)
p1_2h2o_in_rdkit.removeAllModels()
for key, mol in allmol_m1_2h2o_in_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p1_2h2o_in_rdkit.addModel(mb,'sdf')    
p1_2h2o_in_rdkit.setStyle({'stick':{'radius':'0.15'}})
p1_2h2o_in_rdkit.setBackgroundColor('0xeeeeee')
p1_2h2o_in_rdkit.zoomTo()
p1_2h2o_rdkit_handles.append(p1_2h2o_in_rdkit)

p1_2h2o_out_rdkit = py3Dmol.view(width=300,height=300)
p1_2h2o_out_rdkit.removeAllModels()
for key, mol in allmol_m1_2h2o_out_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p1_2h2o_out_rdkit.addModel(mb,'sdf')    
p1_2h2o_out_rdkit.setStyle({'stick':{'radius':'0.15'}})
p1_2h2o_out_rdkit.setBackgroundColor('0xeeeeee')
p1_2h2o_out_rdkit.zoomTo()
p1_2h2o_rdkit_handles.append(p1_2h2o_out_rdkit)

p1_2h2o_inout_rdkit = py3Dmol.view(width=300,height=300)
p1_2h2o_inout_rdkit.removeAllModels()
for key, mol in allmol_m1_2h2o_inout_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p1_2h2o_inout_rdkit.addModel(mb,'sdf')    
p1_2h2o_inout_rdkit.setStyle({'stick':{'radius':'0.15'}})
p1_2h2o_inout_rdkit.setBackgroundColor('0xeeeeee')
p1_2h2o_inout_rdkit.zoomTo()
p1_2h2o_rdkit_handles.append(p1_2h2o_inout_rdkit)

In [60]:
p1_2h2o_rdkit_handles[0].insert('m1_2h2o_in_rdkit')

In [61]:
p1_2h2o_rdkit_handles[1].insert('m1_2h2o_out_rdkit')

In [63]:
p1_2h2o_rdkit_handles[2].insert('m1_2h2o_inout_rdkit')

### pre-screening

In [64]:
allmol_m1_2h2o_rdkit = {}
allmol_m1_2h2o_rdkit.update(allmol_m1_2h2o_in_rdkit)
allmol_m1_2h2o_rdkit.update(allmol_m1_2h2o_out_rdkit)
allmol_m1_2h2o_rdkit.update(allmol_m1_2h2o_inout_rdkit)

energy_m1_2h2o_rdkit = {}
energy_m1_2h2o_rdkit.update(e_m1_2h2o_in_rdkit)
energy_m1_2h2o_rdkit.update(e_m1_2h2o_out_rdkit)
energy_m1_2h2o_rdkit.update(e_m1_2h2o_inout_rdkit)

rms_m1_2h2o_rdkit = {}
for key, mol in allmol_m1_2h2o_rdkit.items():
    rms_m1_2h2o_rdkit[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1_rdkit[key], rms_m1_rdkit[key]))

In [65]:
rms_sorted = sorted(rms_m1_2h2o_rdkit.items(), key=lambda x: x[1])
rms_thresh = 0.05

print("List sorted by RMS:")
for i, t in enumerate(rms_sorted):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted[i][0], energy_m1_2h2o_rdkit[rms_sorted[i][0]], rms_sorted[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_2h2o_rdkit, rms_thresh)

for mol in to_be_deleted:
    del allmol_m1_2h2o_rdkit[mol]
    del energy_m1_2h2o_rdkit[mol]
    del rms_m1_2h2o_rdkit[mol] 

List sorted by RMS:
name = m1_2h2o_out_rdkit_38, E = 59.440000, RMS = 1.014580
name = m1_2h2o_in_rdkit_38, E = 59.440000, RMS = 1.014625
name = m1_2h2o_inout_rdkit_38, E = 59.440000, RMS = 1.014625
name = m1_2h2o_out_rdkit_6, E = 62.850000, RMS = 1.156328
name = m1_2h2o_in_rdkit_6, E = 62.850000, RMS = 1.156332
name = m1_2h2o_inout_rdkit_6, E = 62.850000, RMS = 1.156332
name = m1_2h2o_in_rdkit_35, E = 62.850000, RMS = 1.156345
name = m1_2h2o_inout_rdkit_35, E = 62.850000, RMS = 1.156345
name = m1_2h2o_out_rdkit_35, E = 62.850000, RMS = 1.156795
name = m1_2h2o_out_rdkit_12, E = 57.260000, RMS = 1.227433
name = m1_2h2o_in_rdkit_29, E = 60.250000, RMS = 1.256444
name = m1_2h2o_inout_rdkit_29, E = 60.250000, RMS = 1.256446
name = m1_2h2o_inout_rdkit_54, E = 56.020000, RMS = 1.276147
name = m1_2h2o_out_rdkit_53, E = 56.020000, RMS = 1.276147
name = m1_2h2o_in_rdkit_54, E = 56.020000, RMS = 1.276147
name = m1_2h2o_inout_rdkit_47, E = 57.260000, RMS = 1.280484
name = m1_2h2o_in_rdkit_47, E = 

In [66]:
for key, mol in allmol_m1_2h2o_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p_r = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_2h2o_rdkit.items():
    mb = Chem.MolToMolBlock(mol)
    p_r.addModel(mb,'sdf')
p_r.setStyle({'stick':{'radius':'0.15'}})
p_r.setBackgroundColor('0xeeeeee')
p_r.zoomTo()
p_r.show()

### Summary

In [67]:
allmol_m1_2h2o = {}
allmol_m1_2h2o.update(allmol_m1_2h2o_b)
allmol_m1_2h2o.update(allmol_m1_2h2o_rdkit)

energy_m1_2h2o = {}
energy_m1_2h2o.update(energy_m1_2h2o_b)
energy_m1_2h2o.update(energy_m1_2h2o_rdkit)

rms_m1_2h2o = {}
for key, mol in allmol_m1_2h2o.items():
    rms_m1_2h2o[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m1_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1[key], rms_m1[key]))

In [68]:
rms_sorted = sorted(rms_m1_2h2o.items(), key=lambda x: x[1])
rms_thresh = 0.1

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m1_2h2o, rms_thresh)

for mol in to_be_deleted:
    del allmol_m1_2h2o[mol]
    del energy_m1_2h2o[mol]
    del rms_m1_2h2o[mol]  

Conformers which will be deleted:
['m1_2h2o_out_b_13', 'm1_2h2o_in_b_3', 'm1_2h2o_in_b_2', 'm1_2h2o_in_b_6', 'm1_2h2o_in_b_7', 'm1_2h2o_out_b_3', 'm1_2h2o_in_b_17', 'm1_2h2o_inout_rdkit_38', 'm1_2h2o_out_b_10', 'm1_2h2o_out_rdkit_35', 'm1_2h2o_inout_rdkit_11', 'm1_2h2o_out_rdkit_11', 'm1_2h2o_out_rdkit_3', 'm1_2h2o_inout_rdkit_41', 'm1_2h2o_in_rdkit_12', 'm1_2h2o_out_rdkit_1', 'm1_2h2o_out_rdkit_13', 'm1_2h2o_out_rdkit_21', 'm1_2h2o_inout_rdkit_0']


In [69]:
print(len(allmol_m1_2h2o))

for key, mol in allmol_m1_2h2o.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m1_crystal,atomMap=list(zip(core_mol,core_m1)))
    
p = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m1_2h2o.items():
    mb = Chem.MolToMolBlock(mol)
    p.addModel(mb,'sdf')
p.setStyle({'stick':{'radius':'0.15'}})
p.setBackgroundColor('0xeeeeee')
p.zoomTo()
p.show()

10
