In [None]:
#Imports
import os
import h5py
from math import sqrt
import numpy as np


import torch
import torchani
from torchani.units import HARTREE_TO_KCALMOL

In [None]:
#Build TorchANI Model

model = torchani.models.ANI2x(periodic_table_index=False).to(device).double()   # Set Model with double precision
species_to_tensor = torchani.utils.ChemicalSymbolsToInts(['H','C','N','O','S','F', 'Cl']) #Species to tensor function  

In [None]:
#Functions

def singlepoint_energy_calc(xyz, typ):
    """
    Function that takes coordinates and species and perfoms a single point energy calculation using
    a torchANI model
    
    Parameters:
    -----------
    xyz: coordinates with shape (1, Na, 3), where Na is number of atoms in molecule
    typ: lsit of atom types in molecule with shape (1, Na)
    
    return energy value as tensor
    """
    coordinates = torch.tensor(xyz,requires_grad=True,device=device)
    species=species_to_tensor(typ).unsqueeze(0).to(device)
    _, energy = model((species, coordinates))
    return energy

def abs_dif(x,y):
    """
    Function that calculates the absolute differnce.
    """
    delta = np.subtract(x,y)
    return abs(delta)



In [None]:
'''                 ANI data loader class
    Class for loading data stored with the datapacker class.
'''
class anidataloader(object):

    ''' Contructor '''
    def __init__(self, store_file):
        if not os.path.exists(store_file):
            raise FileNotFoundError('file ' + store_file + 'not found.')
        self.store = h5py.File(store_file,'r')

    ''' Group recursive iterator (iterate through all groups in all branches and return datasets in dicts) '''
    def h5py_dataset_iterator(self,g, prefix=''):
        for key in g.keys():
            item = g[key]
            path = '{}/{}'.format(prefix, key)
            keys = [i for i in item.keys()]
            if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
                data = {'path':path}
                for k in keys:
                    if not isinstance(item[k], h5py.Group):
                        dataset = np.array(item[k].value)

                        if type(dataset) is np.ndarray:
                            if dataset.size != 0:
                                if type(dataset[0]) is np.bytes_:
                                    dataset = [a.decode('ascii') for a in dataset]

                        data.update({k:dataset})

                yield data
            else: # test for group (go down)
                yield from self.h5py_dataset_iterator(item, path)

    ''' Default class iterator (iterate through all data) '''
    def __iter__(self):
        for data in self.h5py_dataset_iterator(self.store):
            yield data

    ''' Returns a list of all groups in the file '''
    def get_group_list(self):
        return [g for g in self.store.values()]

    ''' Allows interation through the data in a given group '''
    def iter_group(self,g):
        for data in self.h5py_dataset_iterator(g):
            yield data
    ''' Returns the requested dataset '''
    def get_data(self, path, prefix=''):
        item = self.store[path]
        path = '{}/{}'.format(prefix, path)
        keys = [i for i in item.keys()]
        data = {'path': path}
        for k in keys:
            if not isinstance(item[k], h5py.Group):
                dataset = np.array(item[k].value)

                if type(dataset) is np.ndarray:
                    if dataset.size != 0:
                        if type(dataset[0]) is np.bytes_:
                            dataset = [a.decode('ascii') for a in dataset]

                data.update({k: dataset})
        return data

    ''' Returns the number of groups '''
    def group_size(self):
        return len(self.get_group_list())

    ''' Returns the number of items in the entire file '''
    def size(self):
        count = 0
        for g in self.store.values():
            count = count + len(g.items())
        return count

    ''' Close the HDF5 file '''
    def cleanup(self):
        self.store.close()

                                                      

# Calculating Interaction  Energies

In [None]:
# Data files: 

data_in = 'h5_files/Halgren.h5'            #Path to H5 File

adl = anidataloader(data_in)                #Load H5 file using the AniDataLoader

In [None]:
#Navigate through h5 file as if it were a dictionary
for dat in adl:
    for key in dat:
        print(key)
    break

## Interaction Energy with No Deformation Energy

In [None]:
systems = []                       # List of system names
ani_eAB = []                       # List of ANI Dimer energies (kcal/mol)
ani_eA = []                        # List of ANI Monomer A energies (kcal/mol)
ani_eB = []                        # List of ANI Monomer B energies (kcal/mol)
dft_eAB = []                       # List of DFT Dimer energies (kcal/mol)
dft_eA = []                        # List of DFT Monomer A energies (kcal/mol)
dft_eB = []                        # List of DFT Monomer B energies (kcal/mol)
   
for dat in adl:
    if '/ani/dimers/' in dat['path']:
        systems.append(dat['path'][12:])
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation 
        ani_eAB.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/ani/monA/' in dat['path']:
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation
        ani_eA.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/ani/monB/' in dat['path']:
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation
        ani_eB.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/dft/dimers/' in dat['path']:
        dft_eAB.append(dat['energy'][0])                                     #Extract DFT energy from H5
    if '/dft/monA/' in dat['path']:
        dft_eA.append(dat['energy'][0])                                      #Extract DFT energy from H5
    if '/dft/monB/' in dat['path']:
        dft_eB.append(dat['energy'][0])                                      #Extract DFT energy from H5

In [None]:
for i in range(len(systems)): 
    print(systems[i])
    print('ANI AB: ', ani_eAB[i] )
    print('ANI A: \t', ani_eA[i] )
    print('ANI B: \t', ani_eB[i] )
    print('DFT AB: ', dft_eAB[i] )
    print('DFT A: \t', dft_eA[i] )
    print('DFT B: \t', dft_eB[i] )

In [None]:
#Calculate the Interaction energies and save them in lists
# IE = E_AB - (E_A+E_B)

ani_int_e = []                                    #List of ANI interaction energies
dft_int_e = []                                    #List of DFT Interaction energies
for i in range(len(systems)):
    a_i_e = ani_eAB[i]-(ani_eA[i]+ani_eB[i])
    ani_int_e.append(a_i_e)
    d_i_e = dft_eAB[i]-(dft_eA[i]+dft_eB[i])
    dft_int_e.append(d_i_e)

In [None]:
#ANI vs DFT
ani_int_e = np.array(ani_int_e)
dft_int_e = np.array(dft_int_e)
print('ANI vs DFT')
print('MAE')
print(np.average(abs_dif(ani_int_e, dft_int_e)))
print('RMSE')
print (sqrt(np.average(abs_dif(ani_int_e,dft_int_e)**2)))

## Interaction Energy with Deformation Energy

In [None]:
systems = []                       # List of system names
ani_eAB = []                       # List of ANI Dimer energies (kcal/mol)
ani_eA = []                        # List of ANI Monomer A energies (kcal/mol)
ani_eB = []                        # List of ANI Monomer B energies (kcal/mol)
dft_eAB = []                       # List of DFT Dimer energies (kcal/mol)
dft_eA = []                        # List of DFT Monomer A energies (kcal/mol)
dft_eB = []                        # List of DFT Monomer B energies (kcal/mol)
   
for dat in adl:
    if '/ani/dimers/' in dat['path']:
        systems.append(dat['path'][12:])
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation 
        ani_eAB.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/ani/optmonA/' in dat['path']:
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation
        ani_eA.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/ani/optmonB/' in dat['path']:
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation
        ani_eB.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/dft/dimers/' in dat['path']:
        dft_eAB.append(dat['energy'][0])                                     #Extract DFT energy from H5
    if '/dft/optmonA/' in dat['path']:
        dft_eA.append(dat['energy'][0])                                      #Extract DFT energy from H5
    if '/dft/optmonB/' in dat['path']:
        dft_eB.append(dat['energy'][0])                                      #Extract DFT energy from H5

In [None]:
for i in range(len(systems)): 
    print(systems[i])
    print('ANI AB: ', ani_eAB[i] )
    print('ANI A: \t', ani_eA[i] )
    print('ANI B: \t', ani_eB[i] )
    print('DFT AB: ', dft_eAB[i] )
    print('DFT A: \t', dft_eA[i] )
    print('DFT B: \t', dft_eB[i] )

In [None]:
#Calculate the Interaction energies and save them in lists
# IE = E_AB - (E_A+E_B)

ani_int_e = []                                    #List of ANI interaction energies
dft_int_e = []                                    #List of DFT Interaction energies
for i in range(len(systems)):
    a_i_e = ani_eAB[i]-(ani_eA[i]+ani_eB[i])
    ani_int_e.append(a_i_e)
    d_i_e = dft_eAB[i]-(dft_eA[i]+dft_eB[i])
    dft_int_e.append(d_i_e)

In [None]:
#ANI vs DFT
ani_int_e = np.array(ani_int_e)
dft_int_e = np.array(dft_int_e)
print('ANI vs DFT')
print('MAE')
print(np.average(abs_dif(ani_int_e, dft_int_e)))
print('RMSE')
print (sqrt(np.average(abs_dif(ani_int_e,dft_int_e)**2)))