In [1]:
#Imports
import os
import h5py
from math import sqrt
import numpy as np


import torch
import torchani
from torchani.units import HARTREE_TO_KCALMOL

In [2]:
#Build TorchANI Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = torchani.models.ANI2x(periodic_table_index=False).to(device).double()   # Set Model with double precision
species_to_tensor = torchani.utils.ChemicalSymbolsToInts(['H','C','N','O','S','F', 'Cl']) #Species to tensor function  

In [3]:
#Functions

def singlepoint_energy_calc(xyz, typ):
    """
    Function that takes coordinates and species and perfoms a single point energy calculation using
    a torchANI model
    
    Parameters:
    -----------
    xyz: coordinates with shape (1, Na, 3), where Na is number of atoms in molecule
    typ: lsit of atom types in molecule with shape (1, Na)
    
    return energy value as tensor
    """
    coordinates = torch.tensor(xyz,requires_grad=True,device=device)
    species=species_to_tensor(typ).unsqueeze(0).to(device)
    _, energy = model((species, coordinates))
    return energy

def abs_dif(x,y):
    """
    Function that calculates the absolute differnce.
    """
    delta = np.subtract(x,y)
    return abs(delta)



In [4]:
'''                 ANI data loader class
    Class for loading data stored with the datapacker class.
'''
class anidataloader(object):

    ''' Contructor '''
    def __init__(self, store_file):
        if not os.path.exists(store_file):
            raise FileNotFoundError('file ' + store_file + 'not found.')
        self.store = h5py.File(store_file,'r')

    ''' Group recursive iterator (iterate through all groups in all branches and return datasets in dicts) '''
    def h5py_dataset_iterator(self,g, prefix=''):
        for key in g.keys():
            item = g[key]
            path = '{}/{}'.format(prefix, key)
            keys = [i for i in item.keys()]
            if isinstance(item[keys[0]], h5py.Dataset): # test for dataset
                data = {'path':path}
                for k in keys:
                    if not isinstance(item[k], h5py.Group):
                        dataset = np.array(item[k].value)

                        if type(dataset) is np.ndarray:
                            if dataset.size != 0:
                                if type(dataset[0]) is np.bytes_:
                                    dataset = [a.decode('ascii') for a in dataset]

                        data.update({k:dataset})

                yield data
            else: # test for group (go down)
                yield from self.h5py_dataset_iterator(item, path)

    ''' Default class iterator (iterate through all data) '''
    def __iter__(self):
        for data in self.h5py_dataset_iterator(self.store):
            yield data

    ''' Returns a list of all groups in the file '''
    def get_group_list(self):
        return [g for g in self.store.values()]

    ''' Allows interation through the data in a given group '''
    def iter_group(self,g):
        for data in self.h5py_dataset_iterator(g):
            yield data
    ''' Returns the requested dataset '''
    def get_data(self, path, prefix=''):
        item = self.store[path]
        path = '{}/{}'.format(prefix, path)
        keys = [i for i in item.keys()]
        data = {'path': path}
        for k in keys:
            if not isinstance(item[k], h5py.Group):
                dataset = np.array(item[k].value)

                if type(dataset) is np.ndarray:
                    if dataset.size != 0:
                        if type(dataset[0]) is np.bytes_:
                            dataset = [a.decode('ascii') for a in dataset]

                data.update({k: dataset})
        return data

    ''' Returns the number of groups '''
    def group_size(self):
        return len(self.get_group_list())

    ''' Returns the number of items in the entire file '''
    def size(self):
        count = 0
        for g in self.store.values():
            count = count + len(g.items())
        return count

    ''' Close the HDF5 file '''
    def cleanup(self):
        self.store.close()

                                                      

# Calculating Interaction  Energies

In [5]:
# Data files: 

data_in = 'h5_files/Halgren.h5'            #Path to H5 File

adl = anidataloader(data_in)                #Load H5 file using the AniDataLoader

In [6]:
#Navigate through h5 file as if it were a dictionary
for dat in adl:
    for key in dat:
        print(key)
    break

path
coordinates
energy
species




## Interaction Energy with No Deformation Energy

In [7]:
systems = []                       # List of system names
ani_eAB = []                       # List of ANI Dimer energies (kcal/mol)
ani_eA = []                        # List of ANI Monomer A energies (kcal/mol)
ani_eB = []                        # List of ANI Monomer B energies (kcal/mol)
dft_eAB = []                       # List of DFT Dimer energies (kcal/mol)
dft_eA = []                        # List of DFT Monomer A energies (kcal/mol)
dft_eB = []                        # List of DFT Monomer B energies (kcal/mol)
   
for dat in adl:
    if '/ani/dimers/' in dat['path']:
        systems.append(dat['path'][12:])
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation 
        ani_eAB.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/ani/monA/' in dat['path']:
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation
        ani_eA.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/ani/monB/' in dat['path']:
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation
        ani_eB.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/dft/dimers/' in dat['path']:
        dft_eAB.append(dat['energy'][0])                                     #Extract DFT energy from H5
    if '/dft/monA/' in dat['path']:
        dft_eA.append(dat['energy'][0])                                      #Extract DFT energy from H5
    if '/dft/monB/' in dat['path']:
        dft_eB.append(dat['energy'][0])                                      #Extract DFT energy from H5

In [8]:
for i in range(len(systems)): 
    print(systems[i])
    print('ANI AB: ', ani_eAB[i] )
    print('ANI A: \t', ani_eA[i] )
    print('ANI B: \t', ani_eB[i] )
    print('DFT AB: ', dft_eAB[i] )
    print('DFT A: \t', dft_eA[i] )
    print('DFT B: \t', dft_eB[i] )

01_HOH_OH2
ANI AB:  -95876.90148359096
ANI A: 	 -47934.334376924155
ANI B: 	 -47934.37657666353
DFT AB:  -95876.95828390002
DFT A: 	 -47934.29710506127
DFT B: 	 -47934.341156226335
03_HOH_OHCH3
ANI AB:  -120533.55017348552
ANI A: 	 -47934.320832016594
ANI B: 	 -72590.1756664719
DFT AB:  -120533.34297399528
DFT A: 	 -47934.30093286907
DFT B: 	 -72590.35724725664
04_CH3OH_OH2
ANI AB:  -120533.25881515798
ANI A: 	 -72590.51867782546
ANI B: 	 -47934.36753394889
DFT AB:  -120533.24200772094
DFT A: 	 -72590.50546499438
DFT B: 	 -47934.33895994318
05_CH3OH_OHCH3
ANI AB:  -145188.80466433548
ANI A: 	 -72590.5190709695
ANI B: 	 -72590.51148114318
DFT AB:  -145189.84532211692
DFT A: 	 -72590.47145398089
DFT B: 	 -72590.41340935456
06_HOH_O(CH3)2
ANI AB:  -145192.32485334962
ANI A: 	 -47934.34656684885
ANI B: 	 -97250.71595461413
DFT AB:  -145193.23895610266
DFT A: 	 -47934.29133197411
DFT B: 	 -97250.56058350432
07_C6H5OH_OH2
ANI AB:  -240830.27173876073
ANI A: 	 -192883.94993510458
ANI B: 	 -47

In [9]:
#Calculate the Interaction energies and save them in lists
# IE = E_AB - (E_A+E_B)

ani_int_e = []                                    #List of ANI interaction energies
dft_int_e = []                                    #List of DFT Interaction energies
for i in range(len(systems)):
    a_i_e = ani_eAB[i]-(ani_eA[i]+ani_eB[i])
    ani_int_e.append(a_i_e)
    d_i_e = dft_eAB[i]-(dft_eA[i]+dft_eB[i])
    dft_int_e.append(d_i_e)

In [10]:
#ANI vs DFT
ani_int_e = np.array(ani_int_e)
dft_int_e = np.array(dft_int_e)
print('ANI vs DFT')
print('MAE')
print(np.average(abs_dif(ani_int_e, dft_int_e)))
print('RMSE')
print (sqrt(np.average(abs_dif(ani_int_e,dft_int_e)**2)))

ANI vs DFT
MAE
1.2426566357970323
RMSE
1.7744918982025237


## Interaction Energy with Deformation Energy

In [11]:
systems = []                       # List of system names
ani_eAB = []                       # List of ANI Dimer energies (kcal/mol)
ani_eA = []                        # List of ANI Monomer A energies (kcal/mol)
ani_eB = []                        # List of ANI Monomer B energies (kcal/mol)
dft_eAB = []                       # List of DFT Dimer energies (kcal/mol)
dft_eA = []                        # List of DFT Monomer A energies (kcal/mol)
dft_eB = []                        # List of DFT Monomer B energies (kcal/mol)
   
for dat in adl:
    if '/ani/dimers/' in dat['path']:
        systems.append(dat['path'][12:])
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation 
        ani_eAB.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/ani/optmonA/' in dat['path']:
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation
        ani_eA.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/ani/optmonB/' in dat['path']:
        energy = singlepoint_energy_calc(dat['coordinates'], dat['species']) #Perform single point calculation
        ani_eB.append(energy.item()*HARTREE_TO_KCALMOL)
    if '/dft/dimers/' in dat['path']:
        dft_eAB.append(dat['energy'][0])                                     #Extract DFT energy from H5
    if '/dft/optmonA/' in dat['path']:
        dft_eA.append(dat['energy'][0])                                      #Extract DFT energy from H5
    if '/dft/optmonB/' in dat['path']:
        dft_eB.append(dat['energy'][0])                                      #Extract DFT energy from H5

In [12]:
for i in range(len(systems)): 
    print(systems[i])
    print('ANI AB: ', ani_eAB[i] )
    print('ANI A: \t', ani_eA[i] )
    print('ANI B: \t', ani_eB[i] )
    print('DFT AB: ', dft_eAB[i] )
    print('DFT A: \t', dft_eA[i] )
    print('DFT B: \t', dft_eB[i] )

01_HOH_OH2
ANI AB:  -95876.90148359096
ANI A: 	 -47934.38261880757
ANI B: 	 -47934.382608929154
DFT AB:  -95876.95828390002
DFT A: 	 -47934.34661555877
DFT B: 	 -47934.346427305914
03_HOH_OHCH3
ANI AB:  -120533.55017348552
ANI A: 	 -47934.38261535262
ANI B: 	 -72590.62650089279
DFT AB:  -120533.34297399528
DFT A: 	 -47934.34661555877
DFT B: 	 -72590.57624806304
04_CH3OH_OH2
ANI AB:  -120533.25881515798
ANI A: 	 -72590.62655767772
ANI B: 	 -47934.38261288357
DFT AB:  -120533.24200772094
DFT A: 	 -72590.5759343083
DFT B: 	 -47934.34661555877
05_CH3OH_OHCH3
ANI AB:  -145188.80466433548
ANI A: 	 -72590.62660859177
ANI B: 	 -72590.6264695402
DFT AB:  -145189.84532211692
DFT A: 	 -72590.57605981019
DFT B: 	 -72590.57624806304
06_HOH_O(CH3)2
ANI AB:  -145192.32485334962
ANI A: 	 -47934.382617695555
ANI B: 	 -97250.84737287807
DFT AB:  -145193.23895610266
DFT A: 	 -47934.34661555877
DFT B: 	 -97250.71859038985
07_C6H5OH_OH2
ANI AB:  -240830.27173876073
ANI A: 	 -192884.126248988
ANI B: 	 -4793

In [13]:
#Calculate the Interaction energies and save them in lists
# IE = E_AB - (E_A+E_B)

ani_int_e = []                                    #List of ANI interaction energies
dft_int_e = []                                    #List of DFT Interaction energies
for i in range(len(systems)):
    a_i_e = ani_eAB[i]-(ani_eA[i]+ani_eB[i])
    ani_int_e.append(a_i_e)
    d_i_e = dft_eAB[i]-(dft_eA[i]+dft_eB[i])
    dft_int_e.append(d_i_e)

In [14]:
#ANI vs DFT
ani_int_e = np.array(ani_int_e)
dft_int_e = np.array(dft_int_e)
print('ANI vs DFT')
print('MAE')
print(np.average(abs_dif(ani_int_e, dft_int_e)))
print('RMSE')
print (sqrt(np.average(abs_dif(ani_int_e,dft_int_e)**2)))

ANI vs DFT
MAE
1.2470259472685548
RMSE
1.8071833865195197
