# This notebook calculates the minimum conformational energy of each molecule, and returns a .sdf file with the input molecules sorted by increasing energy of their lowest energy conformer.  Uses MMFF94 forcefield implementation in RDKit.

## Module imports below

In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdDistGeom

## Function definitions below

In [2]:
def minConfEnergy(mol):
    """Find lowest energy (kcal/mol) conformation of the input molecule.
    
    From an input molecule, generates 50 conformers, minimizes and calculates their conformational energies using MMFF94 forcefield, then returns the lowest energy.
    

    Parameters
    ----------
    mol : RDKit Mol object

    Returns
    -------
    Conformational energy (kcal/mol) (float)

    """
    mol_h = Chem.AddHs(mol, addCoords=True) #add hydrogens, create coordinates
    param = rdDistGeom.ETKDGv2()
    cids = rdDistGeom.EmbedMultipleConfs(mol_h, 50, param)
    mp = AllChem.MMFFGetMoleculeProperties(mol_h, mmffVariant='MMFF94')
    AllChem.MMFFOptimizeMoleculeConfs(mol_h, numThreads=0, mmffVariant='MMFF94') #optimize all conformations
    res = [] # list of conformer energies
    for cid in cids:
        ff = AllChem.MMFFGetMoleculeForceField(mol_h, mp, confId=cid)
        e = ff.CalcEnergy()
        res.append(e)
    return min(res)

## Enter input and output filenames (must be in .sdf format) in cell below

In [None]:
in_file = "<enter input filepath here>"
out_file = "<enter desired output filepath>"

## Main program

In [None]:
# load in data file
data = Chem.SDMolSupplier(in_file)
smiles = [Chem.MolToSmiles(mol) for mol in data]
energies = []
for x in range(len(data)):
    print(f"Processing molecule {x+1} of {len(data)}", end='\r', flush=True)
    energies.append(minConfEnergy(data[x]))
energies_sorted = [e for e, s in sorted(zip(energies, smiles))]
mols_sorted = [Chem.MolFromSmiles(s) for e, s in sorted(zip(energies, smiles))]
# create SD file writer, add energy property to each structure
writer = Chem.SDWriter(out_file)
for x in range(len(mols_sorted)):
    mols_sorted[x].SetDoubleProp("Energy", energies_sorted[x])
    writer.write(mols_sorted[x])
writer.close()