# Exploring conformational space of "M13+2H2O" complexes

In [None]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [1]:
import glob
import py3Dmol

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdMolAlign
from rdkit.Chem.Draw import IPythonConsole
from rdkit import rdBase
print(rdBase.rdkitVersion)
import os,time
print( time.asctime())

2016.09.4
Fri Apr 28 09:29:06 2017


In [2]:
# Functions used in this notebook:

def grep_energies_from_sdf_outputs(files):
    energies = {}
    for inp in files:
        with open(inp,'r') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                if "M  END" in line:
                    energies[os.path.splitext(os.path.basename(inp))[0]] = float(lines[i+1])
    return energies

def find_duplicates(rms_sorted, energy, rms_thresh):
    i = 0
    to_be_deleted = []
    while i < len(rms_sorted):
        j = i + 1
        while j < len(rms_sorted):
            if rms_sorted[i][0] in to_be_deleted:
                i = i + 1
                j = j + 1
            elif rms_sorted[j][0] in to_be_deleted:
                j = j + 1
            else:
                rms1 = rms_sorted[i][1]
                rms2 = rms_sorted[j][1]
                if (rms2 - rms1) < rms_thresh:
                    if energy[rms_sorted[i][0]] < energy[rms_sorted[j][0]]:
                        to_be_deleted.append(rms_sorted[j][0])
                    else:
                        to_be_deleted.append(rms_sorted[i][0])
                else:
                    break
        i = i + 1
    if to_be_deleted:
        print("Conformers which will be deleted:")    
        print(to_be_deleted)
    return to_be_deleted

def get_moldict_from_inplist(inplist):
    moldict = {}
    for i, inp in enumerate(inplist):
        mol = Chem.MolFromPDBFile(inp)
        name = os.path.basename(os.path.splitext(inp)[0]).rsplit("_", 0)[0]
        model = "_".join(os.path.basename(os.path.splitext(inp)[0]).rsplit("_", 2)[1:])
        moldict[name] = mol
    return moldict

In [33]:
# useful for later:
# "core" is a part of a molecule, which we wish to be the "most-aligned" among multiple conformers
smiles      = 'N1C(=O)c2cc(C(=O)NCCCCCNC(=O)c3cc(C(=O)NCCCCC1)cc(c3)C(C)(C)C)cc(c2)C(C)(C)C'
core_smiles = 'c1ccccc1'

m13 = Chem.AddHs(Chem.MolFromSmiles(smiles))
core_m13 = m13.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))

templ_m13 = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m13/m13_crystal.sdf')
m13_crystal = templ_m13[0]



## M13 + 2H2O

M13 + 2H2O structures were generated from the crystal geometry of isolated M13 macrocycle, to which we have added :

* 2 H2O molecules "inside" the macrocycle ("m13_2h2o_in", first from the left fig. below)

* 2 H2O molecules "outside" the macrocycle ("m13_2h2o_out", second from the left fig. below)

* 1 H2O molecule "inside" and 1 H2O molecule "outside" the macrocycle ("m13_2h2o_inout", third from the left fig. below)

manually in Avogadro software; then pre-optimized with MM implemented in Avogadro.

On figures below we present already pre-optimized "m13+2h2o" complexes, which serve as starting geometries for exploration of the conformational space.

In [4]:
%%html
<table>
  <tr>
    <td id="m13_2h2o_in" ></td>
    <td id="m13_2h2o_out" ></td>
    <td id="m13_2h2o_inout" ></td>    
  <tr>
    <td> m13_2h2o_in </td>
    <td> m13_2h2o_out  </td>  
    <td> m13_2h2o_inout  </td>     
  </tr>
</table>

0,1,2
,,
m13_2h2o_in,m13_2h2o_out,m13_2h2o_inout


In [5]:
m_in = open('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/m13_2h2o_in.xyz', 'r').read()
p_in = py3Dmol.view(width=300,height=300)
p_in.addModel(m_in,'xyz')
p_in.setStyle({'stick':{'radius':'0.15'}})
p_in.setBackgroundColor('0xeeeeee')
p_in.zoomTo()
p_in.insert('m13_2h2o_in')

In [6]:
m_out = open('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/m13_2h2o_out.xyz', 'r').read()
p_out = py3Dmol.view(width=300,height=300)
p_out.addModel(m_out,'xyz')
p_out.setStyle({'stick':{'radius':'0.15'}})
p_out.setBackgroundColor('0xeeeeee')
p_out.zoomTo()
p_out.insert('m13_2h2o_out')

In [7]:
m_inout = open('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/m13_2h2o_inout.xyz', 'r').read()
p_inout = py3Dmol.view(width=300,height=300)
p_inout.addModel(m_inout,'xyz')
p_inout.setStyle({'stick':{'radius':'0.15'}})
p_inout.setBackgroundColor('0xeeeeee')
p_inout.zoomTo()
p_inout.insert('m13_2h2o_inout')

### Conformers generated with the Balloon software:

In both cases the Balloon software was asked to generate 100 conformers using the genertic algorithm with default settings (only "maxPostprocessIter" increased to 150 and "nGenerations" to 300).

Below we will present aligned structures of macrocycle M1 conformers in "M1 + H2O" (without showing H2O molecules).

In [8]:
inps_m13_2h2o_in_b  = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/balloon/results_starting_from_m13_2h2o_in/*.sdf')
inps_m13_2h2o_out_b = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/balloon/results_starting_from_m13_2h2o_out/*.sdf')
inps_m13_2h2o_inout_b = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/balloon/results_starting_from_m13_2h2o_inout/*.sdf')

In [9]:
e_m13_2h2o_in_b  = grep_energies_from_sdf_outputs(inps_m13_2h2o_in_b)
e_m13_2h2o_out_b = grep_energies_from_sdf_outputs(inps_m13_2h2o_out_b)
e_m13_2h2o_inout_b = grep_energies_from_sdf_outputs(inps_m13_2h2o_inout_b)

In [52]:
%%html
<table>
  <tr>
    <td id="m13_2h2o_in_b" ></td>
    <td id="m13_2h2o_out_b"  ></td>
    <td id="m13_2h2o_inout_b"  ></td>    
  <tr>
    <td> m13_2h2o_in_b </td>
    <td> m13_2h2o_out_b  </td>  
    <td> m13_2h2o_inout_b  </td>     
  </tr>
</table>

0,1,2
,,
m13_2h2o_in_b,m13_2h2o_out_b,m13_2h2o_inout_b


In [53]:
# write conformers to dictionaries
allmol_m13_2h2o_in_b  = {}
allmol_m13_2h2o_out_b = {}
allmol_m13_2h2o_inout_b = {}
suppl_m13_2h2o_in_b  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/balloon/m13_2h2o_in_result.sdf')
suppl_m13_2h2o_out_b = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/balloon/m13_2h2o_out_result.sdf')
suppl_m13_2h2o_inout_b = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/balloon/m13_2h2o_inout_result.sdf')

for i, mol in enumerate(suppl_m13_2h2o_in_b):
    name = "m13_2h2o_in_b_" + str(i)
    allmol_m13_2h2o_in_b[name] = mol
for i, mol in enumerate(suppl_m13_2h2o_out_b):
    name = "m13_2h2o_out_b_" + str(i)
    allmol_m13_2h2o_out_b[name] = mol 
for i, mol in enumerate(suppl_m13_2h2o_inout_b):
    name = "m13_2h2o_inout_b_" + str(i)
    allmol_m13_2h2o_inout_b[name] = mol     

In [54]:
# align:
for key, mol in allmol_m13_2h2o_in_b.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m13_crystal,atomMap=list(zip(core_mol,core_m13)))
    
for key, mol in allmol_m13_2h2o_out_b.items():   
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m13_crystal,atomMap=list(zip(core_mol,core_m13)))   
    
for key, mol in allmol_m13_2h2o_inout_b.items():   
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m13_crystal,atomMap=list(zip(core_mol,core_m13)))      

In [55]:
# view:
p_b_in = py3Dmol.view(width=300,height=300)
for key, mol in allmol_m13_2h2o_in_b.items():
    mb = Chem.MolToMolBlock(mol)
    p_b_in.addModel(mb,'sdf')
p_b_in.setStyle({'stick':{'radius':'0.15'}})
p_b_in.setBackgroundColor('0xeeeeee')
p_b_in.zoomTo()    

p_b_out = py3Dmol.view(width=300,height=300)
for key, mol in allmol_m13_2h2o_out_b.items():
    mb = Chem.MolToMolBlock(mol)
    p_b_out.addModel(mb,'sdf')
p_b_out.setStyle({'stick':{'radius':'0.15'}})
p_b_out.setBackgroundColor('0xeeeeee')
p_b_out.zoomTo()    

p_b_inout = py3Dmol.view(width=300,height=300)
for key, mol in allmol_m13_2h2o_inout_b.items():
    mb = Chem.MolToMolBlock(mol)
    p_b_inout.addModel(mb,'sdf')
p_b_inout.setStyle({'stick':{'radius':'0.15'}})
p_b_inout.setBackgroundColor('0xeeeeee')
p_b_inout.zoomTo()    

<py3Dmol.view at 0x7f588843e0b8>

In [56]:
p_b_in.insert('m13_2h2o_in_b')

In [57]:
p_b_out.insert('m13_2h2o_out_b')

In [58]:
p_b_inout.insert('m13_2h2o_inout_b')

In [72]:
allmol_m13_2h2o_b = {}
allmol_m13_2h2o_b.update(allmol_m13_2h2o_in_b)
allmol_m13_2h2o_b.update(allmol_m13_2h2o_out_b)
allmol_m13_2h2o_b.update(allmol_m13_2h2o_inout_b)

with open("/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/list_selected_conformers_from_balloon", "w") as f:
    for key, mol in allmol_m13_2h2o_b.items():
        f.write(key+"\n")

### Conformers generated with the RDKit software:

In [59]:
inps_m13_2h2o_in_rdkit  = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/rdkit/results_starting_from_m13_2h2o_in/*.sdf')
inps_m13_2h2o_out_rdkit = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/rdkit/results_starting_from_m13_2h2o_out/*.sdf')
inps_m13_2h2o_inout_rdkit = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/rdkit/results_starting_from_m13_2h2o_inout/*.sdf')

In [60]:
e_m13_2h2o_in_rdkit = grep_energies_from_sdf_outputs(inps_m13_2h2o_in_rdkit)
e_m13_2h2o_out_rdkit = grep_energies_from_sdf_outputs(inps_m13_2h2o_out_rdkit)
e_m13_2h2o_inout_rdkit = grep_energies_from_sdf_outputs(inps_m13_2h2o_inout_rdkit)

In [61]:
# write conformers to dictionaries
allmol_m13_2h2o_in_rdkit = {}
suppl_m13_2h2o_in_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/rdkit/m13_2h2o_in_result.sdf')
allmol_m13_2h2o_out_rdkit = {}
suppl_m13_2h2o_out_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/rdkit/m13_2h2o_out_result.sdf')
allmol_m13_2h2o_inout_rdkit = {}
suppl_m13_2h2o_inout_rdkit  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/rdkit/m13_2h2o_inout_result.sdf')

for i, mol in enumerate(suppl_m13_2h2o_in_rdkit):
    name = "m13_2h2o_in_rdkit_" + str(i)
    allmol_m13_2h2o_in_rdkit[name] = mol  
for i, mol in enumerate(suppl_m13_2h2o_out_rdkit):
    name = "m13_2h2o_out_rdkit_" + str(i)
    allmol_m13_2h2o_out_rdkit[name] = mol  
for i, mol in enumerate(suppl_m13_2h2o_inout_rdkit):
    name = "m13_2h2o_inout_rdkit_" + str(i)
    allmol_m13_2h2o_inout_rdkit[name] = mol    

In [62]:
# align:
for key, mol in allmol_m13_2h2o_in_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m13_crystal,atomMap=list(zip(core_mol,core_m13)))
    
for key, mol in allmol_m13_2h2o_out_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles)) 
    AllChem.AlignMol(mol,m13_crystal,atomMap=list(zip(core_mol,core_m13)))
    
for key, mol in allmol_m13_2h2o_inout_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles)) 
    AllChem.AlignMol(mol,m13_crystal,atomMap=list(zip(core_mol,core_m13)))    

In [63]:
%%html
<table>
  <tr>
    <td id="m13_2h2o_in_rdkit" ></td>
    <td id="m13_2h2o_out_rdkit"  ></td>
    <td id="m13_2h2o_inout_rdkit"  ></td>    
  <tr>
    <td> m13_2h2o_in_rdkit </td>
    <td> m13_2h2o_out_rdkit  </td>  
    <td> m13_2h2o_niout_rdkit  </td>    
  </tr>
</table>

0,1,2
,,
m13_2h2o_in_rdkit,m13_2h2o_out_rdkit,m13_2h2o_niout_rdkit


In [64]:
# view:
p_2h2o_rdkit_handles=[]

p_2h2o_in_rdkit = py3Dmol.view(width=300,height=300)
p_2h2o_in_rdkit.removeAllModels()
for key, mol in allmol_m13_2h2o_in_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p_2h2o_in_rdkit.addModel(mb,'sdf')    
p_2h2o_in_rdkit.setStyle({'stick':{'radius':'0.15'}})
p_2h2o_in_rdkit.setBackgroundColor('0xeeeeee')
p_2h2o_in_rdkit.zoomTo()
p_2h2o_rdkit_handles.append(p_2h2o_in_rdkit)

p_2h2o_out_rdkit = py3Dmol.view(width=300,height=300)
p_2h2o_out_rdkit.removeAllModels()
for key, mol in allmol_m13_2h2o_out_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p_2h2o_out_rdkit.addModel(mb,'sdf')    
p_2h2o_out_rdkit.setStyle({'stick':{'radius':'0.15'}})
p_2h2o_out_rdkit.setBackgroundColor('0xeeeeee')
p_2h2o_out_rdkit.zoomTo()
p_2h2o_rdkit_handles.append(p_2h2o_out_rdkit)

p_2h2o_inout_rdkit = py3Dmol.view(width=300,height=300)
p_2h2o_inout_rdkit.removeAllModels()
for key, mol in allmol_m13_2h2o_inout_rdkit.items(): 
    mb = Chem.MolToMolBlock(mol)
    p_2h2o_inout_rdkit.addModel(mb,'sdf')    
p_2h2o_inout_rdkit.setStyle({'stick':{'radius':'0.15'}})
p_2h2o_inout_rdkit.setBackgroundColor('0xeeeeee')
p_2h2o_inout_rdkit.zoomTo()
p_2h2o_rdkit_handles.append(p_2h2o_inout_rdkit)

In [65]:
p_2h2o_rdkit_handles[0].insert('m13_2h2o_in_rdkit')

In [66]:
p_2h2o_rdkit_handles[1].insert('m13_2h2o_out_rdkit')

In [67]:
p_2h2o_rdkit_handles[2].insert('m13_2h2o_inout_rdkit')

### pre-screening

In [68]:
allmol_m13_2h2o_rdkit = {}
allmol_m13_2h2o_rdkit.update(allmol_m13_2h2o_in_rdkit)
allmol_m13_2h2o_rdkit.update(allmol_m13_2h2o_out_rdkit)
allmol_m13_2h2o_rdkit.update(allmol_m13_2h2o_inout_rdkit)

energy_m13_2h2o_rdkit = {}
energy_m13_2h2o_rdkit.update(e_m13_2h2o_in_rdkit)
energy_m13_2h2o_rdkit.update(e_m13_2h2o_out_rdkit)
energy_m13_2h2o_rdkit.update(e_m13_2h2o_inout_rdkit)

rms_m13_2h2o_rdkit = {}
for key, mol in allmol_m13_2h2o_rdkit.items():
    rms_m13_2h2o_rdkit[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m13_crystal))
    #print("name = {}, E = {:.6f}, RMS = {:.6f}".format(key, energy_m1_rdkit[key], rms_m1_rdkit[key]))

In [69]:
rms_sorted = sorted(rms_m13_2h2o_rdkit.items(), key=lambda x: x[1])
rms_thresh = 0.05

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted = find_duplicates(rms_sorted, energy_m13_2h2o_rdkit, rms_thresh)

for mol in to_be_deleted:
    del allmol_m13_2h2o_rdkit[mol]
    del energy_m13_2h2o_rdkit[mol]
    del rms_m13_2h2o_rdkit[mol] 

Conformers which will be deleted:
['m13_2h2o_inout_rdkit_89', 'm13_2h2o_in_rdkit_89', 'm13_2h2o_in_rdkit_6', 'm13_2h2o_out_rdkit_6', 'm13_2h2o_out_rdkit_4', 'm13_2h2o_inout_rdkit_4', 'm13_2h2o_in_rdkit_4', 'm13_2h2o_inout_rdkit_22', 'm13_2h2o_out_rdkit_22', 'm13_2h2o_out_rdkit_62', 'm13_2h2o_inout_rdkit_62', 'm13_2h2o_in_rdkit_62', 'm13_2h2o_in_rdkit_13', 'm13_2h2o_inout_rdkit_13', 'm13_2h2o_out_rdkit_13', 'm13_2h2o_in_rdkit_22', 'm13_2h2o_in_rdkit_77', 'm13_2h2o_inout_rdkit_77', 'm13_2h2o_out_rdkit_24', 'm13_2h2o_inout_rdkit_24', 'm13_2h2o_out_rdkit_65', 'm13_2h2o_inout_rdkit_65', 'm13_2h2o_out_rdkit_7', 'm13_2h2o_in_rdkit_7', 'm13_2h2o_inout_rdkit_7', 'm13_2h2o_in_rdkit_28', 'm13_2h2o_out_rdkit_28', 'm13_2h2o_inout_rdkit_28', 'm13_2h2o_in_rdkit_10', 'm13_2h2o_inout_rdkit_10', 'm13_2h2o_out_rdkit_10', 'm13_2h2o_inout_rdkit_83', 'm13_2h2o_in_rdkit_83', 'm13_2h2o_in_rdkit_49', 'm13_2h2o_inout_rdkit_49', 'm13_2h2o_out_rdkit_49', 'm13_2h2o_inout_rdkit_33', 'm13_2h2o_in_rdkit_33', 'm13_2h2

In [70]:
for key, mol in allmol_m13_2h2o_rdkit.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m13_crystal,atomMap=list(zip(core_mol,core_m13)))
    
p_r = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m13_2h2o_rdkit.items():
    mb = Chem.MolToMolBlock(mol)
    p_r.addModel(mb,'sdf')
p_r.setStyle({'stick':{'radius':'0.15'}})
p_r.setBackgroundColor('0xeeeeee')
p_r.zoomTo()
p_r.show()

In [71]:
with open("/home/gosia/work/work_on_gitlab/icho/calcs/m13-2h2o/list_selected_conformers_from_rdkit", "w") as f:
    for key, mol in allmol_m13_2h2o_rdkit.items():
        f.write(key+"\n")

### Summary