In [1]:
import pandas as pd

df = pd.read_csv('all_properties.csv')
# Drop the empty Dipole column so we can re‐compute it
df = df.drop(columns=['Dipole'])


In [2]:
from rdkit import Chem
from rdkit.Chem import AllChem

def compute_gasteiger_dipole(smiles):
    mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
    # 1) embed (fast ETKDG)
    if AllChem.EmbedMolecule(mol, AllChem.ETKDG()) != 0:
        return None
    # 2) UFF‐pre‐opt is *optional*; you can skip or run a few steps:
    # AllChem.UFFOptimizeMolecule(mol, maxIters=50)
    # 3) compute Gasteiger charges
    Chem.rdPartialCharges.ComputeGasteigerCharges(mol)
    conf = mol.GetConformer(0)
    coords = conf.GetPositions()
    qs = [float(atom.GetProp('_GasteigerCharge')) for atom in mol.GetAtoms()]
    # 4) sum qi * ri
    vec = sum(q * r for q, r in zip(qs, coords))
    # 5) return magnitude (in e·Å; you can convert to Debye by ×4.8032)
    import numpy as np
    mu = np.linalg.norm(vec)
    return mu * 4.8032  # now in Debye


In [None]:
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed

# Prepare
smiles = df['SMILES'].tolist()
dipoles = [np.nan]*len(smiles)

def task(i, smi):
    val = compute_gasteiger_dipole(smi)
    return i, val

# Parallelize on your 40 cores
with ProcessPoolExecutor(40) as ex:
    futures = [ex.submit(task, i, smi) for i, smi in enumerate(smiles)]
    for fut in as_completed(futures):
        i, val = fut.result()
        dipoles[i] = val

# Attach and save
df['Dipole'] = dipoles
df.to_csv('properties.csv', index=False)
print("Wrote properties.csv")


[13:55:47] UFFTYPER: Unrecognized charge state for atom: 14
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 3
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 4
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 0
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 23
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 1
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 1
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 3
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 3
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 9
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 6
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 11
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 6
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 1
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 19
[13:55:47] UFFTYPER: Unrecognized charge state for atom: 1
[13:55:47] UFFTYPER: Unrecognized charge state for a

Wrote all_properties_with_dipoles.csv
