# Conformers of M1 generated with the RDKit software

In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
import glob
import py3Dmol

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
%matplotlib inline 

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdMolAlign
from rdkit.Chem.Draw import IPythonConsole
from rdkit import rdBase
print(rdBase.rdkitVersion)
import os,time
print( time.asctime())

2016.09.4
Mon May 29 14:50:16 2017


In [3]:
# Functions used in this notebook:

def grep_energies_from_sdf_outputs(files):
    energies = {}
    for inp in files:
        with open(inp,'r') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                if "M  END" in line:
                    energies[os.path.splitext(os.path.basename(inp))[0]] = float(lines[i+1])
    return energies

def write_to_dict(prefix, suppl):
    moldict = {}
    for i, mol in enumerate(suppl):
        name = prefix + str(i)
        moldict[name] = mol
    return moldict

def align_structures_to_lowest_energy(moldict, energy_dict):
    """
    align structures from the structures' dictionary
    
    note that the dictionary in python is not ordered, so the "first" element is arbitrary
    """
    energy_sorted = sorted(energy_dict.items(), key=lambda x: x[1])
    first = energy_sorted[0][0]
    core_first = moldict[first].GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
    
    for key, mol in moldict.items():
        core_mol = mol.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))
        AllChem.AlignMol(mol,moldict[first],atomMap=list(zip(core_mol,core_first)))        

def prepare_view(moldict):
    p = py3Dmol.view(width=400,height=400)
    for key, mol in moldict.items():
        mb = Chem.MolToMolBlock(mol)
        p.addModel(mb,'sdf')
    p.setStyle({'stick':{'radius':'0.15'}})
    p.setBackgroundColor('0xeeeeee')
    p.zoomTo()
    return p        

In [4]:
# "core" is a part of a molecule, which we wish to be the "most-aligned" among multiple conformers
smiles      = 'O=C1NCCNC(=O)c2nc(C(=O)NCCNC(=O)c3nc1ccc3)ccc2'
core_smiles = 'n1ccccc1'

m1 = Chem.AddHs(Chem.MolFromSmiles(smiles))
core_m1 = m1.GetSubstructMatch(Chem.MolFromSmiles(core_smiles))

templ_m1 = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/m1_crystal.sdf')
m1_crystal = templ_m1[0]

Conformers were generated using the distance geometry algorithm as implemented in the RDKit software:

* starting with the crystal geometry kept as a template, results with prefix: "m1_b_sdf"; the crystal is of the "ss-ss" type;

* starting with the SMILES signature of M1, results with prefix: "m1_b_smi"

* starting with structures generated in Avogadro (from the crystal geometry and pre-optimized) of the:
    * "ss_sa" type
    * "ss_aa" type
    * "sa_sa" type
    * "sa_as" type
    * "sa_aa" type
    * "aa_aa" type    

    where "ss\_sa" means "(syn-syn)\_(syn-anti)" configuration, etc. with the bracket notation used to mark conformations around the rings.


* the geometries of generated conformers were then pre-optimized with the MM methods (using UFF force field).

In all cases the RDKit software was asked to generate 100 conformers using the distnce geometry algorithm with default settings (only "pruneRmsThresh" set to 1.0 in "AllChem.EmbedMultipleConfs" and "maxIters" set to 500 in "AllChem.UFFOptimizeMolecule").

In [5]:
inps_m1_rdkit_smi = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_crystal_from_smiles/*.sdf')
inps_m1_rdkit_sdf = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_crystal_from_sdf/*.sdf')
inps_m1_rdkit_ss_sa = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_from_m1_ss_sa/*.sdf')
inps_m1_rdkit_ss_aa = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_from_m1_ss_aa/*.sdf')
inps_m1_rdkit_sa_sa = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_from_m1_sa_sa/*.sdf')
inps_m1_rdkit_sa_as = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_from_m1_sa_as/*.sdf')
inps_m1_rdkit_sa_aa = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_from_m1_sa_aa/*.sdf')
inps_m1_rdkit_aa_aa = glob.glob('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/results_from_m1_aa_aa/*.sdf')

In [6]:
e_m1_rdkit_smi = grep_energies_from_sdf_outputs(inps_m1_rdkit_smi)
e_m1_rdkit_sdf = grep_energies_from_sdf_outputs(inps_m1_rdkit_sdf)
e_m1_rdkit_ss_sa = grep_energies_from_sdf_outputs(inps_m1_rdkit_ss_sa)
e_m1_rdkit_ss_aa = grep_energies_from_sdf_outputs(inps_m1_rdkit_ss_aa)
e_m1_rdkit_sa_sa = grep_energies_from_sdf_outputs(inps_m1_rdkit_sa_sa)
e_m1_rdkit_sa_as = grep_energies_from_sdf_outputs(inps_m1_rdkit_sa_as)
e_m1_rdkit_sa_aa = grep_energies_from_sdf_outputs(inps_m1_rdkit_sa_aa)
e_m1_rdkit_aa_aa = grep_energies_from_sdf_outputs(inps_m1_rdkit_aa_aa)

In [7]:
%%html
<table>
  <tr>
    <td id="m1_rdkit_sdf" ></td>
    <td id="m1_rdkit_smi"  ></td>
  <tr>
    <td> m1_rdkit_sdf (start: crystal; "ss_ss") </td>
    <td> m1_rdkit_smi (start: 2D smiles string) </td>  
  </tr>
    <td id="m1_rdkit_ss_sa" ></td>
    <td id="m1_rdkit_ss_aa"  ></td>
  <tr>
    <td> m1_rdkit_ss_sa </td>
    <td> m1_rdkit_ss_aa  </td>  
  </tr>
    <td id="m1_rdkit_sa_sa" ></td>
    <td id="m1_rdkit_sa_as"  ></td>
  <tr>
    <td> m1_rdkit_sa_sa </td>
    <td> m1_rdkit_sa_as  </td>  
  </tr>
    <td id="m1_rdkit_sa_aa" ></td>
    <td id="m1_rdkit_aa_aa"  ></td>
  <tr>
    <td> m1_rdkit_sa_aa </td>
    <td> m1_rdkit_aa_aa  </td>  
  </tr>
</table>

0,1
,
"m1_rdkit_sdf (start: crystal; ""ss_ss"")",m1_rdkit_smi (start: 2D smiles string)
m1_rdkit_ss_sa,m1_rdkit_ss_aa
m1_rdkit_sa_sa,m1_rdkit_sa_as
m1_rdkit_sa_aa,m1_rdkit_aa_aa


In [8]:
# write conformers to dictionaries
suppl_m1_rdkit_smi  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_smiles.sdf')
suppl_m1_rdkit_sdf  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_sdf.sdf')
suppl_m1_rdkit_ss_sa  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_m1_ss_sa.sdf')
suppl_m1_rdkit_ss_aa  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_m1_ss_aa.sdf')
suppl_m1_rdkit_sa_sa  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_m1_sa_sa.sdf')
suppl_m1_rdkit_sa_as  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_m1_sa_as.sdf')
suppl_m1_rdkit_sa_aa  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_m1_sa_aa.sdf')
suppl_m1_rdkit_aa_aa  = Chem.SDMolSupplier('/home/gosia/work/work_on_gitlab/icho/calcs/m1/rdkit/result_m1_aa_aa.sdf')

allmol_m1_rdkit_smi = write_to_dict("m1_rdkit_smi_", suppl_m1_rdkit_smi)
allmol_m1_rdkit_sdf = write_to_dict("m1_rdkit_sdf_", suppl_m1_rdkit_sdf)
allmol_m1_rdkit_ss_sa = write_to_dict("m1_rdkit_ss_sa_", suppl_m1_rdkit_ss_sa)
allmol_m1_rdkit_ss_aa = write_to_dict("m1_rdkit_ss_aa_", suppl_m1_rdkit_ss_aa)
allmol_m1_rdkit_sa_sa = write_to_dict("m1_rdkit_sa_sa_", suppl_m1_rdkit_sa_sa)
allmol_m1_rdkit_sa_as = write_to_dict("m1_rdkit_sa_as_", suppl_m1_rdkit_sa_as)
allmol_m1_rdkit_sa_aa = write_to_dict("m1_rdkit_sa_aa_", suppl_m1_rdkit_sa_aa)
allmol_m1_rdkit_aa_aa = write_to_dict("m1_rdkit_aa_aa_", suppl_m1_rdkit_aa_aa)

In [9]:
# align:
align_structures_to_lowest_energy(allmol_m1_rdkit_sdf, e_m1_rdkit_sdf)
align_structures_to_lowest_energy(allmol_m1_rdkit_smi, e_m1_rdkit_smi)
align_structures_to_lowest_energy(allmol_m1_rdkit_ss_sa, e_m1_rdkit_ss_sa)
align_structures_to_lowest_energy(allmol_m1_rdkit_ss_aa, e_m1_rdkit_ss_aa)
align_structures_to_lowest_energy(allmol_m1_rdkit_sa_sa, e_m1_rdkit_sa_sa)
align_structures_to_lowest_energy(allmol_m1_rdkit_sa_as, e_m1_rdkit_sa_as)
align_structures_to_lowest_energy(allmol_m1_rdkit_sa_aa, e_m1_rdkit_sa_aa)
align_structures_to_lowest_energy(allmol_m1_rdkit_aa_aa, e_m1_rdkit_aa_aa)

In [10]:
# view:
p1_rdkit_handles=[]
p1_rdkit_handles.append(prepare_view(allmol_m1_rdkit_sdf))
p1_rdkit_handles.append(prepare_view(allmol_m1_rdkit_smi))
p1_rdkit_handles.append(prepare_view(allmol_m1_rdkit_ss_sa))
p1_rdkit_handles.append(prepare_view(allmol_m1_rdkit_ss_aa))
p1_rdkit_handles.append(prepare_view(allmol_m1_rdkit_sa_sa))
p1_rdkit_handles.append(prepare_view(allmol_m1_rdkit_sa_as))
p1_rdkit_handles.append(prepare_view(allmol_m1_rdkit_sa_aa))
p1_rdkit_handles.append(prepare_view(allmol_m1_rdkit_aa_aa))

In [11]:
p1_rdkit_handles[0].insert('m1_rdkit_sdf')

In [12]:
p1_rdkit_handles[1].insert('m1_rdkit_smi')

In [13]:
p1_rdkit_handles[2].insert('m1_rdkit_ss_sa')

In [14]:
p1_rdkit_handles[3].insert('m1_rdkit_ss_aa')

In [15]:
p1_rdkit_handles[4].insert('m1_rdkit_sa_sa')

In [16]:
p1_rdkit_handles[5].insert('m1_rdkit_sa_as')

In [17]:
p1_rdkit_handles[6].insert('m1_rdkit_sa_aa')

In [18]:
p1_rdkit_handles[7].insert('m1_rdkit_aa_aa')