In [2]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem

In [6]:
from tqdm import tqdm

qdf= pd.read_csv('../Raw_MoleculeNet/qm9/raw/qm9.csv')
smiles = qdf['smiles']

three_ds = []
failed_three_ds = []
three_ds_path = 'data/from-smiles/gdb9-3d/'
opt_three_ds = []
failed_opt_three_ds = []
opt_three_ds_path = 'data/from-smiles/gdb9-3d-opt/'
two_ds = []
two_ds_path = 'data/from-smiles/gdb9-2d/'
sdf_file = 'gdb9.sdf'
csv_file = 'gdb9.sdf.csv'

for smile in tqdm(smiles):
    try:
        mol = Chem.MolFromSmiles(smile)
        mol = Chem.AddHs(mol)
        mol.SetProp('_Name', smile)
        AllChem.Compute2DCoords(mol)

        # Save a copy of the 2D molecule
        two_ds.append(mol.__copy__())

        if (AllChem.EmbedMolecule(mol, randomSeed = 0xf00d, maxAttempts = 100) == -1):
            failed_three_ds.append(smile)
            continue
        # Save 3D molecule
        three_ds.append(mol.__copy__())

        if AllChem.MMFFOptimizeMolecule(mol) == -1:
            failed_opt_three_ds.append(smile)
            continue

        # Save optimized 3D molecule
        opt_three_ds.append(mol.__copy__())
    except Exception as e:
        print(e)
        continue

with Chem.SDWriter(two_ds_path + sdf_file) as writer:
    for mol in two_ds:
        writer.write(mol)
qdf.drop(columns=['smiles'], inplace=False).to_csv(two_ds_path + csv_file, index=False)

with Chem.SDWriter(three_ds_path + sdf_file) as writer:
    for mol in three_ds:
        writer.write(mol)
qdf = qdf[~qdf['smiles'].isin(failed_three_ds)]
qdf.drop(columns=['smiles'], inplace=False).to_csv(three_ds_path + csv_file, index=False)

with Chem.SDWriter(opt_three_ds_path + sdf_file) as writer:
    for mol in opt_three_ds:
        writer.write(mol)
qdf = qdf[~qdf['smiles'].isin(failed_opt_three_ds)]
qdf.drop(columns=['smiles'], inplace=False).to_csv(opt_three_ds_path + csv_file, index=False)

100%|██████████| 133885/133885 [14:29<00:00, 154.03it/s]


In [5]:
sdf = Chem.SDMolSupplier('data/from-smiles/gdb8-3d/gdb8.sdf')

In [22]:
try:
    mol = Chem.MolFromSmiles('[H]C([H])([H])C12C([H])([H])N(C1([H])[H])C2([H])[H]')
    mol = Chem.AddHs(mol)
    print(AllChem.EmbedMolecule(mol))
    print(AllChem.EmbedMolecule(mol))
except:
    print('Failed to generate conformer for unrecognized molecules')

-1
-1
