In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from tqdm import tqdm
import os

In [4]:
qdf= pd.read_csv('../data/from-smiles/Lipophilicity.csv')
smiles = qdf['smiles']

three_ds = []
failed_three_ds = []
three_ds_path = '../data/from-smiles/Lipophilicity-3d/'
opt_three_ds = []
failed_opt_three_ds = []
opt_three_ds_path = '../data/from-smiles/Lipophilicity-3d-opt/'
two_ds = []
two_ds_path = '../data/from-smiles/Lipophilicity-2d/'
sdf_file = 'Lipophilicity.sdf'
csv_file = 'Lipophilicity.sdf.csv'

target_columns = ['exp']

for smile in tqdm(smiles):
    try:
        mol = Chem.MolFromSmiles(smile)
        mol = Chem.AddHs(mol)
        mol.SetProp('_Name', smile)
        
        AllChem.Compute2DCoords(mol)

        # Save a copy of the 2D molecule
        two_ds.append(mol.__copy__())

        if (AllChem.EmbedMolecule(mol, randomSeed = 0xf00d, maxAttempts = 100) == -1):
            failed_three_ds.append(smile)
            continue
        # Save 3D molecule
        three_ds.append(mol.__copy__())

        if AllChem.MMFFOptimizeMolecule(mol) == -1:
            failed_opt_three_ds.append(smile)
            continue

        # Save optimized 3D molecule
        opt_three_ds.append(mol.__copy__())
    except Exception as e:
        print(e)
        continue

if not os.path.exists(three_ds_path):
    os.makedirs(three_ds_path)

if not os.path.exists(opt_three_ds_path):
    os.makedirs(opt_three_ds_path)

if not os.path.exists(two_ds_path):
    os.makedirs(two_ds_path)

with Chem.SDWriter(two_ds_path + sdf_file) as writer:
    for mol in two_ds:
        writer.write(mol)
qdf.drop(columns=qdf.columns.difference(target_columns), inplace=False).to_csv(two_ds_path + csv_file, index=False)

with Chem.SDWriter(three_ds_path + sdf_file) as writer:
    for mol in three_ds:
        writer.write(mol)
qdf.drop(columns=qdf.columns.difference(target_columns), inplace=False).to_csv(three_ds_path + csv_file, index=False)


with Chem.SDWriter(opt_three_ds_path + sdf_file) as writer:
    for mol in opt_three_ds:
        writer.write(mol)
qdf.drop(columns=qdf.columns.difference(target_columns), inplace=False).to_csv(opt_three_ds_path + csv_file, index=False)


  3%|▎         | 131/4200 [00:07<03:57, 17.14it/s][14:58:03] UFFTYPER: Unrecognized charge state for atom: 1
  6%|▌         | 233/4200 [00:14<05:16, 12.53it/s][14:58:10] UFFTYPER: Unrecognized charge state for atom: 8
  7%|▋         | 287/4200 [00:17<03:27, 18.87it/s][14:58:13] UFFTYPER: Unrecognized charge state for atom: 8
  8%|▊         | 318/4200 [00:19<04:01, 16.09it/s][14:58:15] UFFTYPER: Unrecognized charge state for atom: 8
 13%|█▎        | 533/4200 [00:32<03:25, 17.88it/s][14:58:28] UFFTYPER: Unrecognized charge state for atom: 8
 16%|█▌        | 669/4200 [00:41<03:04, 19.18it/s][14:58:37] UFFTYPER: Unrecognized charge state for atom: 8
 18%|█▊        | 760/4200 [00:47<04:14, 13.52it/s][14:58:43] UFFTYPER: Unrecognized charge state for atom: 8
 20%|██        | 856/4200 [00:53<03:36, 15.46it/s][14:58:49] UFFTYPER: Unrecognized charge state for atom: 2
 25%|██▌       | 1057/4200 [01:06<03:23, 15.45it/s][14:59:02] UFFTYPER: Unrecognized charge state for atom: 23
 28%|██▊       | 

In [5]:
sdf = Chem.SDMolSupplier('data/from-smiles/gdb8-3d/gdb8.sdf')

In [22]:
try:
    mol = Chem.MolFromSmiles('[H]C([H])([H])C12C([H])([H])N(C1([H])[H])C2([H])[H]')
    mol = Chem.AddHs(mol)
    print(AllChem.EmbedMolecule(mol))
    print(AllChem.EmbedMolecule(mol))
except:
    print('Failed to generate conformer for unrecognized molecules')

-1
-1
