In [1]:
import pandas as pd
import openbabel as ob
from rdkit.Chem.SaltRemover import SaltRemover
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import random
from random import randint
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, GetStereoisomerCount



## Azobenzenes

In [3]:
AZO_SMI = pd.read_csv("Azobenzene_Isomers.csv", sep=',', header=0)
AZO_ID = AZO_SMI.iloc[:,3].tolist()
AZO_TRANS = AZO_SMI.iloc[:,1].tolist()
AZO_CIS = AZO_SMI.iloc[:,2].tolist()

In [3]:
len(AZO_ID)

14001

In [4]:
obConversion = ob.OBConversion()
obConversion.SetInAndOutFormats("sdf", "xyz")
remover = SaltRemover(defnData="[Cl,Br]")
charges=[]
#Generate xyz coordinates files
for i in range(4000):
    smi = AZO_TRANS[i]
    m = Chem.MolFromSmiles(smi)
    #Gemove salts and add H
    res = remover.StripMol(m)
    m2 = Chem.AddHs(res)
    
    AllChem.EmbedMolecule(m2,useRandomCoords=True)
    AllChem.MMFFOptimizeMolecule(m2)
    mol_sdf = Chem.MolToMolBlock(m2)

    mol = ob.OBMol()
    test = obConversion.ReadString(mol, mol_sdf)
    xyz = obConversion.WriteString(mol)
    xyz = xyz.split("\n",2)[2]
    name = AZO_ID[i]
    line = str(int(len(xyz)/49))
    f = open('AZO_TRANS-{}.xyz'.format(name),'w+')
    f.write(line)
    f.write('\n')
    f.write('Molecule ID: {}\n'.format(name))
    f.write(xyz)
    f.write('\n\n\n\n\n')

In [4]:
obConversion = ob.OBConversion()
obConversion.SetInAndOutFormats("sdf", "xyz")
remover = SaltRemover(defnData="[Cl,Br]")
charges=[]
#Generate xyz coordinates files
for i in range(4000):
    smi = AZO_CIS[i]
    m = Chem.MolFromSmiles(smi)
    res = remover.StripMol(m)
    m2 = Chem.AddHs(res)
    
    AllChem.EmbedMolecule(m2,useRandomCoords=True)
    AllChem.MMFFOptimizeMolecule(m2)
    mol_sdf = Chem.MolToMolBlock(m2)

    mol = ob.OBMol()
    test = obConversion.ReadString(mol, mol_sdf)
    xyz = obConversion.WriteString(mol)
    xyz = xyz.split("\n",2)[2]
    name = AZO_ID[i]
    line = str(int(len(xyz)/49))
    f = open('AZO_CIS-{}.xyz'.format(name),'w+')
    f.write(line)
    f.write('\n')
    f.write('Molecule ID: {}\n'.format(name))
    f.write(xyz)
    f.write('\n\n\n\n\n')

In [17]:
ints = []
for i in index:
    m = Chem.MolFromSmiles(AZO_SMI[i])
    count = GetStereoisomerCount(m)
    if count > 1:
        ints.append(i)
print('Indices of selected molecules with stereoisomers:', ints)
print('Number of selected molecules with stereoisomers:', len(ints))

Indices of selected molecules with stereoisomers: [21858, 19308, 5706]
Number of selected molecules with stereoisomers: 3


## Diarylethenes

In [None]:
DIAR_SMI = pd.read_csv("Diarylethene_Isomers.csv", sep=',', header=0)
DIAR_ID = DIAR_SMI.iloc[:,3].tolist()
DIAR_TRANS = DIAR_SMI.iloc[:,1].tolist()
DIAR_CIS = DIAR_SMI.iloc[:,2].tolist()

In [8]:
len(DIAR_ID)

90263

In [12]:
obConversion = ob.OBConversion()
obConversion.SetInAndOutFormats("sdf", "xyz")
remover = SaltRemover(defnData="[Cl,Br]")
charges=[]
#Generate xyz coordinates files
for i in range(4000):
    smi = DIAR_TRANS[i]
    m = Chem.MolFromSmiles(smi)
    #Gemove salts and add H
    res = remover.StripMol(m)
    m2 = Chem.AddHs(res)
    
    AllChem.EmbedMolecule(m2,useRandomCoords=True)
    AllChem.MMFFOptimizeMolecule(m2)
    mol_sdf = Chem.MolToMolBlock(m2)

    mol = ob.OBMol()
    test = obConversion.ReadString(mol, mol_sdf)
    xyz = obConversion.WriteString(mol)
    xyz = xyz.split("\n",2)[2]
    name = DIAR_ID[i]
    line = str(int(len(xyz)/49))
    f = open('DIAR_TRANS-{}.xyz'.format(name),'w+')
    f.write(line)
    f.write('\n')
    f.write('Molecule ID: {}\n'.format(name))
    f.write(xyz)
    f.write('\n\n\n\n\n')

In [12]:
obConversion = ob.OBConversion()
obConversion.SetInAndOutFormats("sdf", "xyz")
remover = SaltRemover(defnData="[Cl,Br]")
charges=[]
#Generate xyz coordinates files
for i in range(4000):
    smi = DIAR_CIS[i]
    m = Chem.MolFromSmiles(smi)
    #Gemove salts and add H
    res = remover.StripMol(m)
    m2 = Chem.AddHs(res)
    
    AllChem.EmbedMolecule(m2,useRandomCoords=True)
    AllChem.MMFFOptimizeMolecule(m2)
    mol_sdf = Chem.MolToMolBlock(m2)

    mol = ob.OBMol()
    test = obConversion.ReadString(mol, mol_sdf)
    xyz = obConversion.WriteString(mol)
    xyz = xyz.split("\n",2)[2]
    name = DIAR_ID[i]
    line = str(int(len(xyz)/49))
    f = open('DIAR_CIS-{}.xyz'.format(name),'w+')
    f.write(line)
    f.write('\n')
    f.write('Molecule ID: {}\n'.format(name))
    f.write(xyz)
    f.write('\n\n\n\n\n')

In [14]:
ints = []
for i in index:
    m = Chem.MolFromSmiles(DIAR_SMI[i])
    count = GetStereoisomerCount(m)
    if count > 1:
        ints.append(i)
print('Indices of selected molecules with stereoisomers:', ints)
print('Number of selected molecules with stereoisomers:', len(ints))

Indices of selected molecules with stereoisomers: [60107, 82767, 49314, 38220]
Number of selected molecules with stereoisomers: 4
