# Exploring conformational space of selected macrocycles - "M7", Part 2

In this notebook we present and analyze selected structures, technical notes are [here](www.gitlab.com/user/gosia/icho).

In [3]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [1]:
import glob
import py3Dmol

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdMolAlign
from rdkit.Chem.Draw import IPythonConsole
from rdkit import rdBase
print(rdBase.rdkitVersion)
import os,time
print( time.asctime())

2016.09.4
Tue Apr 11 10:10:23 2017


In [2]:
# Functions used in this notebook:
def grep_energies_from_csvfile(fcsv):
    energies = {}
    with open(fcsv,'r') as f:
        lines = f.readlines()
        for line in lines:
            words = line.split()
            name = words[0].rsplit("_", 5)[0]
            energies[name] = float(words[1])
    return energies

def find_duplicates(rms_sorted, energy, rms_thresh):
    i = 0
    to_be_deleted = []
    while i < len(rms_sorted):
        j = i + 1
        while j < len(rms_sorted):
            if rms_sorted[i][0] in to_be_deleted:
                i = i + 1
                j = j + 1
            elif rms_sorted[j][0] in to_be_deleted:
                j = j + 1
            else:
                rms1 = rms_sorted[i][1]
                rms2 = rms_sorted[j][1]
                if (rms2 - rms1) < rms_thresh:
                    if energy[rms_sorted[i][0]] < energy[rms_sorted[j][0]]:
                        to_be_deleted.append(rms_sorted[j][0])
                    else:
                        to_be_deleted.append(rms_sorted[i][0])
                else:
                    break
        i = i + 1
    if to_be_deleted:
        print("Conformers which will be deleted:")    
        print(to_be_deleted)
    return to_be_deleted

def get_moldict_from_inplist(inplist):
    moldict = {}
    for i, inp in enumerate(inplist):
        mol = Chem.MolFromMolFile(inp)
        name = os.path.basename(os.path.splitext(inp)[0]).rsplit("_", 5)[0]
        model = "_".join(os.path.basename(os.path.splitext(inp)[0]).rsplit("_", 2)[1:])
        moldict[name] = mol
    return moldict

def check_freq(freq_inp_list):
    negative_freq_dict = {}
    for inp in freq_inp_list:
        count = 0
        with open(inp, "r") as f:
            lines = f.readlines()
            for line in lines:
                freq = line.split().strip()[1]
                print(freq)
                if float(freq) < 0.0:
                    count += 1
            if count > 0:
                negative_freq_list[os.path.basename(os.path.splitext(inp)[0]).rsplit("_", 5)[0]] = count
    return negative_freq_dict

In [3]:
# decide what is the "core" - a part of molecule, which we wish to be most aligned (rmsd-wise) among all the structures

smiles = 'N1C(=O)c2nc(C(=O)NCCCNC(=O)c3nc(C(=O)NCCC1)ccc3)ccc2'
core_smiles = 'C(=O)c1nc(C=O)ccc1'

m7 = Chem.AddHs(Chem.MolFromSmiles(smiles))
core_m7 = m7.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))

templ_m7 = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m7/balloon/m7_crystal.sdf')
m7_crystal = templ_m7[0]

## Geometry optimization in Gaussian 09 software

### Isolated "M7" molecule in the gas phase - computational setup

We will use few models to determine the geometry and energy of M7 conformers. All calculations in Gaussian 09.

| Abbrev. | XC Model | Basis set | "Geometry" keywords | "Freq" keywords |
|---------|----------|-----------|---------------------|-----------------|
| s1 | PBE | 6-31G(d) |`#p PBEh1PBE/6-31G(d') opt test nosymm pop=full`|`#p PBEh1PBE/6-31G(d') freq test geom=check guess=read`|
| s2 | B97D | 6-31G(d) | `#p B97D/6-31G(d') opt test nosymm pop=full` | `#p B97D/6-31G(d') freq test geom=check guess=read` |
| s3 | B97D | cc-pVTZ |                     |                 |




### Isolated "M7" molecule in the gas phase - results

In [4]:
inps_m7_gaussian_s1 = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7/gaussian_results/*pbe_631gd.sdf')
inps_m7_gaussian_s2 = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m7/gaussian_results/*b97d_631gd.sdf')

mol_m7_gaussian_s1 = get_moldict_from_inplist(inps_m7_gaussian_s1)
mol_m7_gaussian_s2 = get_moldict_from_inplist(inps_m7_gaussian_s2)

for key, mol in mol_m7_gaussian_s1.items():    
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
    
p = py3Dmol.view(width=400,height=400)
for key, mol in mol_m7_gaussian_s1.items(): 
    mb = Chem.MolToMolBlock(mol)
    p.addModel(mb,'sdf')
p.setStyle({'stick':{'radius':'0.15'}})
p.setBackgroundColor('0xeeeeee')
p.zoomTo()
p.show()     

We will search for potential duplicates (based on RMSD and on energy) and remove them:

In [5]:
f_s1 = "/home/gosia/work/work_on_gitlab/icho/calcs/m7/gaussian_results/energy_pbe_631gd.csv"
e_m7_gaussian_s1 = grep_energies_from_csvfile(f_s1)
   
allmol_m7_s1 = {}
allmol_m7_s1.update(mol_m7_gaussian_s1)

energy_m7_s1 = {}
energy_m7_s1.update(e_m7_gaussian_s1)

rms_m7_s1 = {}
for key, mol in allmol_m7_s1.items():
    rms_m7_s1[key] = AllChem.GetBestRMS(Chem.RemoveHs(mol),Chem.RemoveHs(m7_crystal))

In [7]:
rms_thresh = 0.05

rms_sorted_s1 = sorted(rms_m7_s1.items(), key=lambda x: x[1])


print("List sorted by RMS:")
for i, t in enumerate(rms_sorted_s1):
    print("name = {}, E = {:.6f}, RMS = {:.6f}".format(rms_sorted_s1[i][0], energy_m7_s1[rms_sorted_s1[i][0]], rms_sorted_s1[i][1]))

# now compare RMS of each pair, if the structures are too similar then delete the one with the higher energy
to_be_deleted_s1 = find_duplicates(rms_sorted_s1, energy_m7_s1, rms_thresh)

for mol in to_be_deleted_s1:
    del allmol_m7_s1[mol]
    del energy_m7_s1[mol]
    del rms_m7_s1[mol]   

List sorted by RMS:
name = m7_b_sdf_0, E = -1403.447796, RMS = 0.109481
name = m7_b_smi_0, E = -1403.447857, RMS = 0.110671
name = m7_b_sdf_20, E = -1403.447266, RMS = 0.367043
name = m7_b_sdf_6, E = -1403.446338, RMS = 0.552527
name = m7_rdkit_smi_24, E = -1403.414271, RMS = 1.238407
name = m7_rdkit_sdf_66, E = -1403.393926, RMS = 1.606088
name = m7_rdkit_smi_81, E = -1403.406549, RMS = 1.654906
name = m7_rdkit_sdf_104, E = -1403.393027, RMS = 1.863044
name = m7_b_sdf_3, E = -1403.450445, RMS = 1.872205
name = m7_b_sdf_12, E = -1403.450450, RMS = 1.875135
name = m7_b_sdf_2, E = -1403.450440, RMS = 1.935951
name = m7_rdkit_sdf_70, E = -1403.424439, RMS = 2.001788
name = m7_rdkit_smi_94, E = -1403.406371, RMS = 2.101674
name = m7_rdkit_smi_15, E = -1403.399946, RMS = 2.127330
name = m7_rdkit_smi_43, E = -1403.396856, RMS = 2.345021
Conformers which will be deleted:
['m7_b_sdf_0', 'm7_rdkit_sdf_66', 'm7_rdkit_sdf_104', 'm7_b_sdf_3', 'm7_rdkit_smi_15']


We can now present the  remaining conformers:

In [8]:
print(len(allmol_m7_s1))

for key, mol in allmol_m7_s1.items():
    core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    AllChem.AlignMol(mol,m7_crystal,atomMap=list(zip(core_mol,core_m7)))
    
p = py3Dmol.view(width=400,height=400)
for key, mol in allmol_m7_s1.items():
    mb = Chem.MolToMolBlock(mol)
    p.addModel(mb,'sdf')
p.setStyle({'stick':{'radius':'0.15'}})
p.setBackgroundColor('0xeeeeee')
p.zoomTo()
p.show()

10
