In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from aiidalab_widgets_base import StructureManagerWidget
from aiidalab_widgets_base import SmilesWidget
from aiidalab_widgets_base.viewers import StructureDataViewer
from aiidalab_ispg.widgets import TrajectoryDataViewer
from aiida.plugins import DataFactory
StructureData = DataFactory('structure')
TrajectoryData = DataFactory('array.trajectory')

In [None]:
# Detailed documentation
# https://www.rdkit.org/docs/RDKit_Book.html#conformer-generation
# API reference
# https://www.rdkit.org/docs/source/rdkit.Chem.rdDistGeom.html?highlight=embedmultipleconfs#rdkit.Chem.rdDistGeom.EmbedMultipleConfs

# https://sourceforge.net/p/rdkit/mailman/rdkit-discuss/thread/CWLP265MB0818A57240D003F146E910798C680%40CWLP265MB0818.GBRP265.PROD.OUTLOOK.COM/#msg36584689
#

from traitlets import Union, Instance
from ase import Atoms
from aiida.orm import Data
import numpy as np

class ConformerManagerWidget(StructureManagerWidget):
    
    SUPPORTED_DATA_FORMATS = {"CifData": "cif",
                              "StructureData": "structure",
                              "TrajectoryData": "array.trajectory"
                             }
    
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

class ConformerWidget(SmilesWidget):
    
    structure = Union([Instance(Atoms), Instance(TrajectoryData)], allow_none=True)
    

    # TODO: Adjust mux number of steps and relax convergence criteria
    # fmax - maximum force per atom for convergence (0.05 default in ASE)
    # maxstep - maximum atom displacement per iteration (angstrom, 0.04 ASE default)
    def _xtb_opt(self, atoms, xtb_method="GFN2-xTB", max_steps=50, fmax=0.1):
        # https://wiki.fysik.dtu.dk/ase/gettingstarted/tut02_h2o_structure/h2o.html
        # https://xtb-python.readthedocs.io/en/latest/general-api.html
        from xtb.interface import Calculator
        from xtb.ase.calculator import XTB
        from ase.optimize import BFGS, GPMin
        atoms.calc = XTB(method=xtb_method)
        #opt = BFGS(atoms, maxstep=0.06, trajectory=None, logfile=None)
        opt = GPMin(atoms, trajectory=None, logfile=None)
        converged = opt.run(steps=max_steps, fmax=fmax)
        if converged:
            print("%s minimization converged in %d iterations" % (xtb_method, opt.get_number_of_steps()))
        else:
            print("%s minimization failed to converged in %d iterations" % (xtb.method, opt.get_number_of_steps()))
        return atoms
    
    def _mol_from_smiles(self, smiles, steps=1000):
        """Convert SMILES to ase structure try rdkit then pybel"""
        struct = None
        try:
            struct = self._rdkit_opt(smiles, steps)
            if struct is None:
                struct = self._pybel_opt(smiles, steps)
        except ValueError as e:
            self.output.value += str(e)
            return None

        if struct is None or len(struct) == 0:
            return None
        
        conformers = self.optimize_conformers(struct)
        conformers = self._filter_and_sort_conformers(conformers)
        return self._create_trajectory_node(conformers)
    
    def optimize_conformers(self, conformers):
        """Conformer optimization with XTB"""
        self.output.value += "<br> Optimizing conformers with xtb"
        opt_structs = []
        for ase_struct in conformers:
            opt_struct = self._xtb_opt(ase_struct)
            if opt_struct is not None:
                opt_structs.append(opt_struct)
        return opt_structs
    
    def _create_trajectory_node(self, conformers):
        if conformers is None or len(conformers) == 0:
            return None
        
        node_list = []
        for conformer in conformers:
            node = StructureData(ase=conformer)
            node_list.append(node)
        
        traj = TrajectoryData(structurelist=node_list)
        traj.set_extra("smiles", conformers[0].info["smiles"])
        energies = np.fromiter((conf.get_potential_energy() for conf in conformers), count=len(conformers), dtype=float)
        traj.set_array('energies', energies)
        return traj
    
    # TODO: Automatically sort conformers and filter out those with high energy
    # Boltzmann criterion: Add conformers until reaching e.g. 95% cumulative Boltzmann population
    def _filter_and_sort_conformers(self, ase_structs):
        return ase_structs
    
    def _rdkit_opt(self, smiles, steps, algo="UFF", num_confs=10):
        """Optimize a molecule using force field and rdkit (needed for complex SMILES)."""
        from rdkit import Chem
        from rdkit.Chem import AllChem
        
        algo = "UFF"
        
        self.output.value += "Using algorithm: %s " % algo

        smiles = smiles.replace("[", "").replace("]", "")
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            # Something is seriously wrong with the SMILES code,
            # just return None and don't attempt anything else.
            msg = "RDkit ERROR: Invalid SMILES string"
            #raise ValueError(msg)
            self.output.value = msg
            return None
        
        mol = Chem.AddHs(mol)
        
        if algo == "UFF-single":
            params = AllChem.ETKDG()
            params.maxAttempts = 20
            params.randomSeed =  42
            conf_id = AllChem.EmbedMolecule(mol, params=params)
            if conf_id == -1:
                # This is a more robust setting for larger molecules, per
                # https://sourceforge.net/p/rdkit/mailman/message/21776083/
                self.output.value += "Embedding failed, retrying with random coordinates."
                params.useRandomCoords = True
                conf_id = AllChem.EmbedMolecule(mol, params=params)
            if conf_id == -1:
                msg = " Failed to generate conformer with RDKit. Trying OpenBabel next."
                raise ValueError(msg)
            if not AllChem.UFFHasAllMoleculeParams(mol):
                msg = "RDKit WARNING: Missing UFF parameters"
                return None

            AllChem.UFFOptimizeMolecule(mol, maxIters=steps)
            conf_ids = [conf_id]

        if algo == "UFF":
            params = AllChem.ETKDG()
            params.pruneRmsThresh = 0.1
            params.maxAttempts = 20
            params.randomSeed =  42
            conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, params = params)
            # Not sure what is the fail condition here
            if len(conf_ids) == 0:
                # This is a more robust setting for larger molecules, per
                # https://sourceforge.net/p/rdkit/mailman/message/21776083/
                self.output.value += "Embedding failed, retrying with random coordinates."
                params.useRandomCoords = True
                conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, params = params)
            if len(conf_ids) == -1:
                msg = " Failed to generate conformer with RDKit. Trying OpenBabel next."
                raise ValueError(msg)
            
            if not AllChem.UFFHasAllMoleculeParams(mol):
                msg = "RDKit WARNING: Missing UFF parameters"
                return None

            # https://www.rdkit.org/docs/source/rdkit.Chem.rdForceFieldHelpers.html?highlight=uff#rdkit.Chem.rdForceFieldHelpers.UFFOptimizeMoleculeConfs
            conf_opt = AllChem.UFFOptimizeMoleculeConfs(mol, maxIters=steps, numThreads=1)
            #for converged, energy in conf_opt:
            #    print("Converged: %d, Energy: %g" % (converged, energy))
 
            
        elif algo == "ETKDG":
            # https://www.rdkit.org/docs/Cookbook.html?highlight=allchem%20embedmultipleconfs#conformer-generation-with-etkdg
            params = AllChem.ETKDGv2()
            params.pruneRmsThresh = 0.5
            params.randomSeed = 424242
            conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, params = params)
        
        print("Number of generated conformers = %d" % len(conf_ids))

        ase_structs = []
        for conf_id in conf_ids:
            positions = mol.GetConformer(id=conf_id).GetPositions()
            natoms = mol.GetNumAtoms()
            species = [mol.GetAtomWithIdx(j).GetSymbol() for j in range(natoms)]
            ase_structs.append(self._make_ase(species, positions, smiles))
        return ase_structs

In [None]:
structure_manager_widget = ConformerManagerWidget(
    importers=[
        ConformerWidget(title="SMILES")
    ],
    #node_class='StructureData',
    #node_class=None,
    node_class='TrajectoryData',
    #viewer=StructureDataViewer(),
    viewer=TrajectoryDataViewer(),
)
display(structure_manager_widget)