In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from tqdm import tqdm
import os

In [2]:
qdf= pd.read_csv('../toxcast_data.csv')
smiles = qdf['smiles']

three_ds = []
failed_three_ds = []
three_ds_path = 'data/from-smiles/toxcast-3d/'
opt_three_ds = []
failed_opt_three_ds = []
opt_three_ds_path = 'data/from-smiles/toxcast-3d-opt/'
two_ds = []
two_ds_path = 'data/from-smiles/toxcast-2d/'
sdf_file = 'toxcast.sdf'
csv_file = 'toxcast.sdf.csv'

for smile in tqdm(smiles):
    try:
        mol = Chem.MolFromSmiles(smile)
        mol = Chem.AddHs(mol)
        mol.SetProp('_Name', smile)
        
        AllChem.Compute2DCoords(mol)

        # Save a copy of the 2D molecule
        two_ds.append(mol.__copy__())

        if (AllChem.EmbedMolecule(mol, randomSeed = 0xf00d, maxAttempts = 100) == -1):
            failed_three_ds.append(smile)
            continue
        # Save 3D molecule
        three_ds.append(mol.__copy__())

        if AllChem.MMFFOptimizeMolecule(mol) == -1:
            failed_opt_three_ds.append(smile)
            continue

        # Save optimized 3D molecule
        opt_three_ds.append(mol.__copy__())
    except Exception as e:
        print(e)
        continue

if not os.path.exists(three_ds_path):
    os.makedirs(three_ds_path)

if not os.path.exists(opt_three_ds_path):
    os.makedirs(opt_three_ds_path)

if not os.path.exists(two_ds_path):
    os.makedirs(two_ds_path)

with Chem.SDWriter(two_ds_path + sdf_file) as writer:
    for mol in two_ds:
        writer.write(mol)
qdf.drop(columns=['smiles'], inplace=False).to_csv(two_ds_path + csv_file, index=False)

with Chem.SDWriter(three_ds_path + sdf_file) as writer:
    for mol in three_ds:
        writer.write(mol)
qdf = qdf[~qdf['smiles'].isin(failed_three_ds)]
qdf.drop(columns=['smiles'], inplace=False).to_csv(three_ds_path + csv_file, index=False)

with Chem.SDWriter(opt_three_ds_path + sdf_file) as writer:
    for mol in opt_three_ds:
        writer.write(mol)
qdf = qdf[~qdf['smiles'].isin(failed_opt_three_ds)]
qdf.drop(columns=['smiles'], inplace=False).to_csv(opt_three_ds_path + csv_file, index=False)

  0%|          | 18/8597 [00:00<07:07, 20.09it/s][14:23:39] UFFTYPER: Unrecognized atom type: Ba (0)
[14:23:39] UFFTYPER: Unrecognized charge state for atom: 1
[14:23:39] UFFTYPER: Unrecognized atom type: Nd2+3 (1)
[14:23:39] UFFTYPER: Unrecognized charge state for atom: 1
[14:23:39] UFFTYPER: Unrecognized atom type: In2+3 (1)
[14:23:39] UFFTYPER: Unrecognized atom type: Co3+3 (0)
  1%|          | 46/8597 [00:01<03:25, 41.58it/s][14:23:39] UFFTYPER: Unrecognized atom type: Ca+2 (0)
  1%|          | 88/8597 [00:01<02:09, 65.73it/s][14:23:40] UFFTYPER: Unrecognized atom type: Ni3+2 (0)
[14:23:40] UFFTYPER: Unrecognized charge state for atom: 1
[14:23:40] UFFTYPER: Unrecognized charge state for atom: 0
[14:23:40] UFFTYPER: Unrecognized charge state for atom: 0
[14:23:40] UFFTYPER: Unrecognized atom type: Cd+2 (0)
  1%|          | 98/8597 [00:01<02:00, 70.82it/s][14:23:40] UFFTYPER: Unrecognized atom type: Ca+2 (0)
  2%|▏         | 177/8597 [00:04<03:27, 40.59it/s][14:23:42] UFFTYPER: Unre

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 13%|█▎        | 1143/8597 [00:40<02:56, 42.33it/s][14:24:18] UFFTYPER: Unrecognized charge state for atom: 0
[14:24:18] UFFTYPER: Unrecognized atom type: Gd2+3 (0)
[14:24:20] UFFTYPER: Unrecognized charge state for atom: 1
[14:24:20] UFFTYPER: Unrecognized atom type: Mn2+2 (0)
[14:24:20] UFFTYPER: Unrecognized charge state for atom: 7
 14%|█▍        | 1222/8597 [00:42<02:55, 42.04it/s][14:24:20] UFFTYPER: Unrecognized charge state for atom: 10
 14%|█▍        | 1237/8597 [00:42<02:26, 50.16it/s][14:24:20] UFFTYPER: Unrecognized atom type: Au6+3 (6)
 15%|█▌        | 1298/8597 [00:44<03:48, 31.88it/s][14:24:22] UFFTYPER: Unrecognized charge state for atom: 0
[14:24:22] UFFTYPER: Unrecognized atom type: Gd2+3 (0)
 16%|█▌        | 1383/8597 [00:46<02:48, 42.91it/s][14:24:24] UFFTYPER: Unrecognized atom type: Mn2+2 (0)
 16%|█▌        | 1394/8597 [00:47<03:38, 32.93it/s][14:24:25] UFFTYPER: Unrecognized charge state for atom: 2
 17%|█▋        | 1455/8597 [00:53<08:35, 13.86it/s][14:24:31] UF

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 21%|██        | 1818/8597 [01:12<03:04, 36.83it/s][14:24:50] UFFTYPER: Unrecognized atom type: Ni6+2 (12)
[14:24:52] UFFTYPER: Unrecognized hybridization for atom: 2
[14:24:52] UFFTYPER: Unrecognized atom type: Au+3 (2)
 21%|██▏       | 1847/8597 [01:15<07:22, 15.26it/s][14:24:53] UFFTYPER: Unrecognized atom type: Fe5+2 (7)
 22%|██▏       | 1879/8597 [01:18<05:30, 20.32it/s][14:24:56] Explicit valence for atom # 0 Cl, 2, is greater than permitted
 22%|██▏       | 1886/8597 [01:18<04:14, 26.41it/s]

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


[14:24:58] UFFTYPER: Unrecognized charge state for atom: 0
 23%|██▎       | 1966/8597 [01:21<02:47, 39.70it/s][14:24:59] UFFTYPER: Unrecognized charge state for atom: 0
[14:24:59] UFFTYPER: Unrecognized atom type: Zn+2 (0)
 23%|██▎       | 1987/8597 [01:22<03:56, 27.89it/s][14:25:00] UFFTYPER: Unrecognized atom type: In2+3 (1)
 24%|██▎       | 2023/8597 [01:24<07:34, 14.47it/s][14:25:02] UFFTYPER: Unrecognized atom type: Fe2+2 (0)
 24%|██▍       | 2056/8597 [01:26<11:38,  9.36it/s][14:25:05] UFFTYPER: Unrecognized atom type: Cr1+3 (0)
[14:25:05] UFFTYPER: Unrecognized charge state for atom: 0
[14:25:05] UFFTYPER: Unrecognized atom type: Zn+2 (0)
 24%|██▍       | 2073/8597 [01:27<05:26, 19.97it/s][14:25:05] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 24%|██▍       | 2081/8597 [01:27<05:42, 19.04it/s][14:25:06] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 25%|██▌       | 2164/8597 [01:31<04:41, 22.89it/s][14:25:09] UFFTYPER: Unrecognized charge state for atom: 1
[14:25:14] UFFTYPER: Unrec

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)
Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 29%|██▉       | 2481/8597 [01:47<03:29, 29.24it/s][14:25:26] UFFTYPER: Unrecognized atom type: Ca+2 (0)
[14:25:29] UFFTYPER: Unrecognized charge state for atom: 1
 30%|███       | 2594/8597 [01:57<04:13, 23.67it/s][14:25:35] UFFTYPER: Unrecognized charge state for atom: 7
[14:25:40] UFFTYPER: Unrecognized charge state for atom: 1
 31%|███▏      | 2706/8597 [02:03<03:56, 24.95it/s][14:25:41] UFFTYPER: Unrecognized charge state for atom: 3
 33%|███▎      | 2856/8597 [02:09<03:49, 25.07it/s][14:25:47] UFFTYPER: Unrecognized charge state for atom: 4
 34%|███▎      | 2890/8597 [02:10<04:33, 20.85it/s][14:25:49] UFFTYPER: Unrecognized atom type: Sc2+3 (1)
 34%|███▍      | 2907/8597 [02:13<13:00,  7.29it/s][14:25:51] UFFTYPER: Unrecognized atom type: Ca+2 (0)
[14:25:55] UFFTYPER: Unrecognized charge state for atom: 1
 35%|███▍      | 2988/8597 [02:16<02:28, 37.77it/s][14:25:55] UFFTYPER: Unrecognized atom type: Cr3+3 (7)
[14:25:56] UFFTYPER: Unrecognized charge state for atom: 1
[14:25:56] U

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


[14:26:53] UFFTYPER: Unrecognized charge state for atom: 1
[14:26:53] UFFTYPER: Unrecognized charge state for atom: 14
 51%|█████     | 4378/8597 [03:23<04:05, 17.19it/s][14:27:01] UFFTYPER: Unrecognized atom type: Au6+3 (6)
[14:27:01] UFFTYPER: Unrecognized atom type: Cr3+3 (4)
[14:27:01] UFFTYPER: Unrecognized atom type: Cr3+3 (4)
[14:27:01] UFFTYPER: Unrecognized atom type: Cr3+3 (4)
[14:27:02] UFFTYPER: Unrecognized charge state for atom: 1
 52%|█████▏    | 4435/8597 [03:25<05:19, 13.03it/s][14:27:03] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 52%|█████▏    | 4476/8597 [03:26<03:13, 21.26it/s][14:27:05] UFFTYPER: Unrecognized hybridization for atom: 1
[14:27:05] UFFTYPER: Unrecognized atom type: Pt+2 (1)
 54%|█████▍    | 4637/8597 [03:34<02:42, 24.32it/s][14:27:12] UFFTYPER: Unrecognized charge state for atom: 14
 56%|█████▌    | 4781/8597 [03:40<01:28, 43.31it/s][14:27:18] SMILES Parse Error: syntax error while parsing: FAIL
[14:27:18] SMILES Parse Error: Failed parsing SMILES 'F

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 57%|█████▋    | 4918/8597 [03:45<04:04, 15.02it/s][14:27:23] UFFTYPER: Unrecognized atom type: Ag5+1 (0)
 59%|█████▉    | 5098/8597 [03:55<05:03, 11.52it/s][14:27:33] SMILES Parse Error: syntax error while parsing: FAIL
[14:27:33] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'
 59%|█████▉    | 5100/8597 [03:55<04:40, 12.47it/s]

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 60%|█████▉    | 5151/8597 [03:56<01:19, 43.43it/s][14:27:35] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 61%|██████    | 5202/8597 [04:00<02:35, 21.89it/s][14:27:38] UFFTYPER: Unrecognized atom type: Ba (0)
 61%|██████    | 5250/8597 [04:01<01:12, 46.38it/s][14:27:39] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 62%|██████▏   | 5359/8597 [04:03<01:09, 46.86it/s][14:27:42] UFFTYPER: Unrecognized atom type: Cu5+1 (0)
[14:27:47] UFFTYPER: Unrecognized charge state for atom: 0
 64%|██████▍   | 5498/8597 [04:10<02:22, 21.75it/s][14:27:48] UFFTYPER: Unrecognized charge state for atom: 2
[14:27:48] UFFTYPER: Unrecognized charge state for atom: 2
 64%|██████▍   | 5511/8597 [04:10<01:26, 35.74it/s][14:27:48] UFFTYPER: Unrecognized charge state for atom: 0
[14:27:48] UFFTYPER: Unrecognized atom type: Zn+2 (0)
 64%|██████▍   | 5516/8597 [04:11<04:04, 12.62it/s][14:27:50] UFFTYPER: Unrecognized charge state for atom: 0
[14:27:50] UFFTYPER: Unrecognized atom type: Zn+2 (0)
[14:27:53] UFFTYPER: Unre

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


[14:28:13] UFFTYPER: Unrecognized charge state for atom: 0
[14:28:13] UFFTYPER: Unrecognized atom type: Pb3+3 (0)
 70%|███████   | 6041/8597 [04:35<00:48, 52.82it/s][14:28:14] UFFTYPER: Unrecognized atom type: Se2+2 (3)
[14:28:14] UFFTYPER: Unrecognized atom type: Se2+2 (3)
 71%|███████▏  | 6137/8597 [04:39<00:54, 44.88it/s][14:28:18] UFFTYPER: Unrecognized hybridization for atom: 3
[14:28:18] UFFTYPER: Unrecognized atom type: Pt+2 (3)
 72%|███████▏  | 6158/8597 [04:40<01:00, 40.45it/s][14:28:18] SMILES Parse Error: syntax error while parsing: FAIL
[14:28:18] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'
 72%|███████▏  | 6165/8597 [04:40<00:56, 43.22it/s]

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 72%|███████▏  | 6212/8597 [04:43<02:18, 17.17it/s][14:28:21] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 73%|███████▎  | 6300/8597 [04:44<00:48, 47.28it/s][14:28:23] UFFTYPER: Unrecognized atom type: Se2+2 (3)
 74%|███████▍  | 6362/8597 [04:46<00:56, 39.39it/s][14:28:25] UFFTYPER: Unrecognized atom type: As2+3 (1)
 74%|███████▍  | 6374/8597 [04:46<00:51, 43.46it/s][14:28:25] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 74%|███████▍  | 6384/8597 [04:47<00:39, 56.55it/s][14:28:25] UFFTYPER: Unrecognized atom type: Mn2+2 (0)
 75%|███████▍  | 6429/8597 [04:48<01:40, 21.58it/s][14:28:27] UFFTYPER: Unrecognized charge state for atom: 1
 76%|███████▌  | 6507/8597 [04:51<01:09, 29.97it/s][14:28:29] UFFTYPER: Unrecognized charge state for atom: 14
 77%|███████▋  | 6604/8597 [04:55<01:09, 28.70it/s][14:28:33] SMILES Parse Error: syntax error while parsing: FAIL
[14:28:33] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'


Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 77%|███████▋  | 6620/8597 [04:56<01:42, 19.32it/s][14:28:34] SMILES Parse Error: syntax error while parsing: FAIL
[14:28:34] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'
 77%|███████▋  | 6626/8597 [04:56<01:17, 25.59it/s][14:28:34] SMILES Parse Error: syntax error while parsing: FAIL
[14:28:34] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'
 77%|███████▋  | 6630/8597 [04:56<01:12, 27.31it/s]

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)
Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 78%|███████▊  | 6669/8597 [04:58<01:19, 24.32it/s][14:28:36] UFFTYPER: Unrecognized atom type: Co5+3 (45)
 78%|███████▊  | 6734/8597 [05:00<00:56, 33.04it/s][14:28:38] UFFTYPER: Unrecognized charge state for atom: 4
[14:28:38] UFFTYPER: Unrecognized charge state for atom: 0
 79%|███████▉  | 6814/8597 [05:02<00:40, 43.95it/s][14:28:41] UFFTYPER: Unrecognized charge state for atom: 14
 80%|███████▉  | 6864/8597 [05:05<01:19, 21.88it/s][14:28:44] UFFTYPER: Unrecognized atom type: Co3+3 (0)
 80%|████████  | 6907/8597 [05:08<01:41, 16.57it/s][14:28:46] SMILES Parse Error: syntax error while parsing: FAIL
[14:28:46] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'
 80%|████████  | 6913/8597 [05:08<01:15, 22.34it/s]

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 82%|████████▏ | 7014/8597 [05:13<01:09, 22.72it/s][14:28:52] UFFTYPER: Unrecognized charge state for atom: 1
[14:28:52] UFFTYPER: Unrecognized atom type: Se2+2 (1)
[14:28:54] UFFTYPER: Unrecognized charge state for atom: 0
 82%|████████▏ | 7081/8597 [05:17<01:01, 24.71it/s][14:28:56] UFFTYPER: Unrecognized charge state for atom: 1
 83%|████████▎ | 7159/8597 [05:20<00:37, 38.50it/s][14:28:59] UFFTYPER: Unrecognized atom type: Zn1+2 (1)
[14:28:59] UFFTYPER: Unrecognized atom type: Pd6+2 (1)
 84%|████████▎ | 7185/8597 [05:21<00:34, 40.70it/s][14:28:59] SMILES Parse Error: syntax error while parsing: FAIL
[14:28:59] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'


Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 84%|████████▎ | 7190/8597 [05:21<00:50, 27.80it/s][14:29:00] UFFTYPER: Unrecognized atom type: Cu6+1 (0)
 84%|████████▍ | 7209/8597 [05:22<00:43, 31.94it/s][14:29:00] UFFTYPER: Unrecognized atom type: Fe2+2 (0)
 84%|████████▍ | 7215/8597 [05:22<00:38, 35.70it/s][14:29:00] UFFTYPER: Unrecognized atom type: Ni6+2 (1)
 84%|████████▍ | 7220/8597 [05:22<00:37, 37.16it/s][14:29:00] UFFTYPER: Unrecognized atom type: Fe2+2 (0)
 84%|████████▍ | 7228/8597 [05:24<02:08, 10.69it/s][14:29:02] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 84%|████████▍ | 7255/8597 [05:24<00:56, 23.96it/s][14:29:03] UFFTYPER: Unrecognized atom type: Fe2+2 (0)
[14:29:03] UFFTYPER: Unrecognized charge state for atom: 0
 85%|████████▍ | 7271/8597 [05:25<00:29, 44.33it/s][14:29:03] UFFTYPER: Unrecognized atom type: Cr3+3 (1)
[14:29:03] UFFTYPER: Unrecognized atom type: Cr3+3 (5)
 85%|████████▍ | 7281/8597 [05:25<00:24, 53.39it/s][14:29:03] UFFTYPER: Unrecognized charge state for atom: 1
[14:29:03] UFFTYPER: Unrecognized c

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 86%|████████▌ | 7389/8597 [05:27<00:17, 69.35it/s][14:29:05] UFFTYPER: Unrecognized charge state for atom: 2
 86%|████████▋ | 7425/8597 [05:28<00:22, 53.01it/s][14:29:06] UFFTYPER: Unrecognized atom type: Sr (0)
[14:29:06] UFFTYPER: Unrecognized atom type: Sr (0)
[14:29:06] UFFTYPER: Unrecognized charge state for atom: 1
 87%|████████▋ | 7462/8597 [05:29<00:31, 36.03it/s][14:29:07] UFFTYPER: Unrecognized atom type: Mn2+2 (0)
[14:29:07] UFFTYPER: Unrecognized charge state for atom: 0
[14:29:07] UFFTYPER: Unrecognized atom type: Zn+2 (0)
 88%|████████▊ | 7525/8597 [05:33<01:05, 16.39it/s][14:29:12] UFFTYPER: Unrecognized atom type: Ca+2 (0)
 88%|████████▊ | 7533/8597 [05:34<00:48, 21.76it/s][14:29:12] UFFTYPER: Unrecognized charge state for atom: 14
 88%|████████▊ | 7549/8597 [05:34<00:38, 27.27it/s][14:29:13] UFFTYPER: Unrecognized atom type: Ca+2 (0)
[14:29:22] UFFTYPER: Unrecognized charge state for atom: 1
 91%|█████████▏| 7860/8597 [05:50<00:18, 39.20it/s][14:29:28] SMILES Parse Er

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 92%|█████████▏| 7950/8597 [05:52<00:18, 35.00it/s][14:29:30] SMILES Parse Error: syntax error while parsing: FAIL
[14:29:30] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'
 93%|█████████▎| 7954/8597 [05:52<00:18, 34.88it/s]

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 94%|█████████▎| 8046/8597 [05:56<00:29, 18.98it/s][14:29:34] SMILES Parse Error: syntax error while parsing: FAIL
[14:29:34] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'
 94%|█████████▎| 8052/8597 [05:56<00:23, 23.54it/s]

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 95%|█████████▌| 8182/8597 [05:59<00:06, 60.55it/s][14:29:37] SMILES Parse Error: syntax error while parsing: FAIL
[14:29:37] SMILES Parse Error: Failed parsing SMILES 'FAIL' for input: 'FAIL'
[14:29:37] UFFTYPER: Unrecognized charge state for atom: 17
 95%|█████████▌| 8189/8597 [05:59<00:07, 52.96it/s]

Python argument types in
    rdkit.Chem.rdmolops.AddHs(NoneType)
did not match C++ signature:
    AddHs(RDKit::ROMol mol, bool explicitOnly=False, bool addCoords=False, boost::python::api::object onlyOnAtoms=None, bool addResidueInfo=False)


 96%|█████████▋| 8276/8597 [06:00<00:05, 58.52it/s][14:29:39] UFFTYPER: Unrecognized hybridization for atom: 1
[14:29:39] UFFTYPER: Unrecognized atom type: Pt+2 (1)
 97%|█████████▋| 8373/8597 [06:02<00:04, 45.02it/s][14:29:40] UFFTYPER: Unrecognized charge state for atom: 11
 98%|█████████▊| 8430/8597 [06:03<00:04, 35.00it/s][14:29:42] UFFTYPER: Unrecognized charge state for atom: 7
 98%|█████████▊| 8444/8597 [06:04<00:03, 43.29it/s][14:29:42] UFFTYPER: Unrecognized charge state for atom: 1
[14:29:48] UFFTYPER: Unrecognized charge state for atom: 1
[14:29:48] UFFTYPER: Unrecognized charge state for atom: 1
[14:29:48] UFFTYPER: Unrecognized charge state for atom: 1
[14:29:48] UFFTYPER: Unrecognized charge state for atom: 14
100%|██████████| 8597/8597 [06:10<00:00, 23.18it/s]


In [5]:
sdf = Chem.SDMolSupplier('data/from-smiles/gdb8-3d/gdb8.sdf')

In [22]:
try:
    mol = Chem.MolFromSmiles('[H]C([H])([H])C12C([H])([H])N(C1([H])[H])C2([H])[H]')
    mol = Chem.AddHs(mol)
    print(AllChem.EmbedMolecule(mol))
    print(AllChem.EmbedMolecule(mol))
except:
    print('Failed to generate conformer for unrecognized molecules')

-1
-1
