Purpose:
CM gradient optimisation of site similarity

In [6]:
# dependencies
from rdkit import Chem
from rdkit.Chem import AllChem, ChemicalForceFields
from rdkit.Chem import rdmolfiles

import qml
import ase
import ase.calculators
from ase.optimize import LBFGS

import numpy as np

import matplotlib.pyplot as plt
import pyscf
import pyscf.gto
import pyscf.qmmm
import pyscf.scf
import pyscf.dft
import pyscf.lib

In [80]:
def _distance_bn_nb(charges, positions, idxA, idxB):
    size = 30
    q = charges.copy()
    q[[idxA, idxB]] = (5, 7)
    a = qml.representations.generate_coulomb_matrix(q, positions, size=size, sorting='row-norm')
    q = charges.copy()
    q[[idxA, idxB]] = (7, 5)
    b = qml.representations.generate_coulomb_matrix(q, positions, size=size, sorting='row-norm')
    return np.linalg.norm(a-b)

def distance_bn_nb(charges, positions, idxA, idxB):
    size = 30
    q = charges.copy()
    atomic = qml.representations.generate_atomic_coulomb_matrix(q, positions, sorting = 'row-norm', 
            interaction_cutoff=3, central_cutoff=3, interaction_decay=1, central_decay=1)
    return np.linalg.norm(atomic[idxA] - atomic[idxB])

class CMGapCalc(ase.calculators.calculator.Calculator):
    implemented_properties = 'energy forces'.split()
    
    def __init__(self, pdbfile):
        self._pdbfile = pdbfile
        self._nevals = 0
        self._damping = 50
    
    def _do_E(self, positions):
        ds = distance_bn_nb(c.nuclear_charges, positions, 4, 2)
        
        # add UFFs
        self._ffmol = Chem.MolFromPDBFile(self._pdbfile, removeHs=False)
        self._ffprop = ChemicalForceFields.MMFFGetMoleculeProperties(self._ffmol)
        self._ff = ChemicalForceFields.MMFFGetMoleculeForceField(self._ffmol, self._ffprop)
        conf = self._ffmol.GetConformer(0)
        for atom in range(len(positions)):
            conf.SetAtomPosition(atom, positions[atom])
        uffE = self._ff.CalcEnergy()
        
        return uffE/self._damping, ds
    
    def _do_Em(self, positions):
        es = self._do_E(positions)
        return es[0] + es[1]
    
    def get_potential_energy(self, atoms=None, force_consistent=False):
        positions = atoms.get_positions()
        return self._do_Em(positions)
    
    def get_forces(self, atoms=None):
        gradient = np.zeros(atoms.get_positions().shape)
        delta = 0.001
        positions = atoms.get_positions()
        for dim in range(3):
            for atom in range(len(atoms.get_positions())):
                fwd = positions.copy()
                fwd[atom, dim] += delta
                bwd = positions.copy()
                bwd[atom, dim] -= delta
                gradient[atom, dim] = (self._do_Em(fwd) - self._do_Em(bwd))/(2*delta)
        return -gradient
                

In [81]:
c = qml.Compound('../../dsgdb9nsd_050720_2.xyz')
atm = ase.Atoms(numbers=c.nuclear_charges, positions=c.coordinates, calculator=CMGapCalc('../../dsgdb9nsd_050720.pdb'))
atmref = ase.Atoms(numbers=c.nuclear_charges, positions=c.coordinates, calculator=CMGapCalc('../../dsgdb9nsd_050720.pdb'))
import os
try:
    os.unlink('log.xyz')
except:
    pass
def logtraj(a=atm):
    pos = atm.get_positions()
    print (atm.calc._do_E(pos))
    with open('log.xyz', 'a') as fh:
        fh.write('%d\n\n' % len(pos))
        for aidx in range(len(pos)):
            fh.write('%s %f %f %f\n' % (c.atomtypes[aidx], pos[aidx, 0], pos[aidx, 1], pos[aidx, 2]))
dyn = LBFGS(atm, use_line_search=True,)
dyn.attach(logtraj, interval=1)
dyn.run(fmax=0.25)

       Step     Time          Energy         fmax
*Force-consistent energies used in optimization.
LBFGS:    0 13:12:32       24.465848*     103.1243
(1.740490347284067, 22.725357638055566)
LBFGS:    1 13:12:32       15.570154*      36.6942
(1.5279814452798925, 14.04217225693431)
LBFGS:    2 13:12:32       14.004051*      32.4305
(1.5073154944636724, 12.49673502788322)
LBFGS:    3 13:12:33        8.756255*      30.3302
(1.3284441588723328, 7.4278108551998585)
LBFGS:    4 13:12:33        7.471236*      16.0422
(1.3052220292964278, 6.1660142310564945)
LBFGS:    5 13:12:33        6.616807*      18.7870
(1.381850524702943, 5.234956055122412)
LBFGS:    6 13:12:34        5.033591*      15.5345
(1.6721129495957348, 3.3614782964699943)
LBFGS:    7 13:12:34        3.491148*      14.5958
(1.9923601184351987, 1.498787797221844)
LBFGS:    8 13:12:35        3.134331*      10.1220
(2.070590374447442, 1.0637405589166313)
LBFGS:    9 13:12:40        3.015481*     658.9367
(2.0507042767715418, 0.964776

RuntimeError: LineSearch failed!

In [70]:
atm.get_positions()

array([[-0.00519709,  1.38351915,  0.55083979],
       [ 0.01928775,  0.06300246, -0.11756584],
       [ 1.50632371, -0.83750034, -0.23393918],
       [ 1.35535077, -2.02147951, -1.02799277],
       [ 0.22292707, -2.84167351, -0.71294744],
       [-1.25139625, -1.8926082 , -0.64106735],
       [-1.033549  , -0.83263039,  0.39223986],
       [-2.26133058, -0.09573088,  0.64825078],
       [-3.13442351, -0.78326454,  1.5293486 ],
       [ 0.76626226,  1.92707087,  0.15985663],
       [ 2.22633035, -0.18719491, -0.74450142],
       [ 1.89281084, -1.03525653,  0.7829967 ],
       [ 0.08562664, -3.55093445, -1.53499655],
       [ 0.32442581, -3.40552033,  0.23108971],
       [-1.52167343, -1.32278247, -1.61553396],
       [-2.44296293, -2.33402891, -0.39500499],
       [-2.81441818,  0.17929862, -0.25575288]])

In [71]:
def get_electronic_energy(nuclear_charges, coordinates):
    mol = gto.Mole(verbose=0)
    mol.build(atom=list(zip(nuclear_charges, coordinates)), basis='631G')
    calc = pyscf.scf.RHF(mol)
    total = calc.scf()
    
    return total-mol.energy_nuc()

In [79]:
for atomobj in (atm, ):
    q = c.nuclear_charges.copy()
    q[[2, 4]] = (5, 7)
    E1 = get_electronic_energy(q, atomobj.get_positions())
    q = c.nuclear_charges.copy()
    q[[2, 4]] = (7, 5)
    E2 = get_electronic_energy(q, atomobj.get_positions())
    print (abs(E1-E2))

0.7307480791357648


In [None]:
# ref: 0.8064662384433632