# Add other empirical disp corrections to PBE and analyze disp correction properties

In [1]:
# Loads stuff
import sys
sys.path.insert(0,"/home/lg3u19/OnePy")
import onetep_v0_1 as op
import pathlib
import pandas as pd
pd.set_option('display.precision',3) 
import copy

In [2]:
csv_target = './ProcessedData/EmpiricalDispersion'

In [3]:
snapshots_5 = ['24801', '32401', '17201', '9601','2001'] 
snapshots_10 = ['24801', '32401', '17201', '13401', '21001', '28601', '9601', '5801','36201','2001'] 
snapshots_25 = [2001, 3521, 5041, 6561, 8081, 9601, 11121, 12641, 14161, 15681, 17201, 18721, 20241, 21761, 23281, 24801, 26321, 27841, 29361, 30881, 32401, 33921, 35441, 36961, 38481]
snapshots_25 = [str(x) for x in snapshots_25]
snapshots_50 = [2001, 2761, 3521, 4281, 5041, 5801, 6561, 7321, 8081, 8841, 9601, 10361, 11121, 11881, 12641, 13401, 14161, 14921, 15681, 16441, 17201, 17961, 18721, 19481, 20241, 21001, 21761, 22521, 23281, 24041, 24801, 25561, 26321, 27081, 27841, 28601, 29361, 30121, 30881, 31641, 32401, 33161, 33921, 34681, 35441, 36201, 36961, 37721, 38481, 39241]
snapshots_50 = [str(x) for x in snapshots_50]

## Load BindingEnergy DataFrame from csv

In [4]:
# do this with proppre methpod
corrected_10 = pd.read_csv('./ProcessedData/Binding/corrected_10.csv',index_col=0)
uncorrected_10 = pd.read_csv('./ProcessedData/Binding/uncorrected_10.csv',index_col=0)
corrected_5 = pd.read_csv('./ProcessedData/Binding/corrected_5.csv',index_col=0)
uncorrected_5 = pd.read_csv('./ProcessedData/Binding/uncorrected_5.csv',index_col=0)
uncorrected_25 = pd.read_csv('./ProcessedData/Binding/uncorrected_25.csv',index_col=0)
corrected_25 = pd.read_csv('./ProcessedData/Binding/corrected_25.csv',index_col=0)

In [5]:
corrected_25

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM
catechol,-8.412,-8.683,-8.482,-4.4,-12.2,-4.453
methylphenol,-8.664,-8.366,-7.808,-4.4,-10.1,-7.436
fluoroaniline,-5.726,-5.275,-5.483,-5.5,-6.3,-8.854
hydroxyaniline,-6.365,-7.216,-7.053,0.0,-8.2,-8.116


# Functions to deal with dispersion

In [7]:
def load_disp_from_folder(root,exclude_list=[".","_",","],dat_file_name='disp.data'):
    """ goes through folders in root dir and loads csv files into dict by folder name
    folder correspond to ligands """
    root = pathlib.Path(root) 
    disp_dict = {}
    for folder in root.iterdir():
        if folder.is_dir():
            if folder.name[0] not in exclude_list:
                disp_dict[folder.name]=pd.read_csv(folder/dat_file_name,sep=' ')
    return disp_dict 

In [8]:
def process_disp(disp_dict,snapshots):
    """ return dictionary of mean net dispersion over a subset of snapshots"""
    mean_dict = {}
    for key in disp_dict.keys():
        # select subset of snaps
        subset = disp_dict[key].loc[[int(x) for x in snapshots],:]
        # calc net dispersion term
        subset['diff']=subset['complex']-subset['host']-subset['ligand']
        mean_dict[key]=subset.mean()['diff']
    return mean_dict

In [9]:
def elstner_dispersion_collection(directories, functional, snapshots=False):
    """ complicated way of getting the elstner dispersion terms for all onetep calcs, 
    these are processed to get the mean mean net dispersion over a set of snapshots_present
    directories: is a list of directories in which to find onetep outfiles
    Changed to take only one functional at a time as argument""" 
    data_temp = {}
    for outfile_dir_root in directories:
        inner_dict = {}
        outfile_dir = outfile_dir_root / functional
        # load a dictionary of onetep objects using OnePy 
        object_dict = op.load_out_files(outfile_dir,format_flag=True,
                                    delim='_',split_num=0) 
        # this appears to override the snapshots_present parameter, INVESTIGATE
        # determine which snapshots have been loaded for a given functional and dir
        snapshots_present = []
        for key in object_dict.keys():
            snapshot = key.split(sep='_')[-1]
            if snapshot not in snapshots_present:
                snapshots_present.append(snapshot)
        # call dispersion routine for each object
        for key in object_dict.keys():
            object_dict[key].get_dispersion()
        # initialize lists
        complex_disp=[]
        host_disp=[]
        ligand_disp=[]
        diff=[]
        # loop over all snapshots loaded from dir for this functional
        for snapshot in snapshots_present:
            # populate lists with comp, host, lig disp and calc net disp (diff)
            complex_disp.append(op.hartree_to_kcal_mol(object_dict['complex_'+snapshot].empirical_dispersion))
            host_disp.append(op.hartree_to_kcal_mol(object_dict['host_'+snapshot].empirical_dispersion))
            ligand_disp.append(op.hartree_to_kcal_mol(object_dict['ligand_'+snapshot].empirical_dispersion))
            diff.append(op.hartree_to_kcal_mol(object_dict['complex_'+snapshot].empirical_dispersion-
                                              object_dict['host_'+snapshot].empirical_dispersion-
                                              object_dict['ligand_'+snapshot].empirical_dispersion))
        # populates an inner loop temp dict with the data from above
        inner_dict['complex']=complex_disp
        inner_dict['host']=host_disp
        inner_dict['ligand']=ligand_disp
        inner_dict['diff']=diff
        # adds snapshots present and processed
        inner_dict['snapshot']=[int(x) for x in snapshots_present]
        # convert to df and set snapshots as indexz
        df_inner_dict=pd.DataFrame.from_dict(inner_dict)
        df_inner_dict=df_inner_dict.set_index('snapshot')
        # add the inner loop df to the outer loop dictionary with key as name of ligand
        data_temp[str(outfile_dir_root.name).split('_')[0]]=df_inner_dict
    # use OnePy utility to generate df from 2 times nested dict. Although is this even nested....
    #dispersion_df = op.df_from_dict_2nested(data_temp)    
    # call process_disp to get mean value over subset of snapshots. Here need to select subsets...
    if snapshots == False:
        mean_disp_elstner = process_disp(data_temp,snapshots_present)
    else:
        mean_disp_elstner = process_disp(data_temp,snapshots)
    return mean_disp_elstner

In [10]:
def d2_d3_dispersion_correction(ligands,snapshots,mean_disp_elstner,root_dir):
    """ streamlines process of getting rel mean dispersion corrections to augment final binding energies"""
    mean_disp_correction = {}
    for ligand in ligands:
        temp1= load_disp_from_folder(root_dir+ligand)
        temp2= process_disp(temp1,snapshots)
        temp2['elstner']=mean_disp_elstner[ligand]
        mean_disp_correction[ligand]=temp2
    df_mean_disp = pd.DataFrame.from_dict(mean_disp_correction)
    df_rel_mean_dips = df_mean_disp.copy()
    df_rel_mean_dips = df_rel_mean_dips.drop('phenol',axis=1)
    for ligand in ligands:
        if ligand != 'phenol':
            df_rel_mean_dips[ligand]=df_mean_disp[ligand]-df_mean_disp['phenol']
    return df_rel_mean_dips


In [11]:
def edit_pbe_df(df_binding_energies,df_rel_mean_disp,disp_list=['bj','bjm','old','zero','zero']):
    """ given the output df of norm rel binding energies, adds columns for the different empirical
    disp corrections to pbe by suptraction elstener and adding XXX """ 
    for disp in disp_list:
        df_binding_energies['PBE_'+disp]=df_binding_energies['PBE']-df_rel_mean_disp.transpose()['elstner']+df_rel_mean_disp.transpose()[disp]
    return df_binding_energies
                                                           

## Dispersion, 25 snaps

In [24]:
# loading elstner data
functionals = 'PBE'
directories = [pathlib.Path.cwd() / 'phenol_outfiles',pathlib.Path.cwd() / 'catechol_outfiles',
              pathlib.Path.cwd() / 'methylphenol_outfiles',pathlib.Path.cwd() / 'fluoroaniline_outfiles',
              pathlib.Path.cwd() / 'hydroxyaniline_outfiles']
mean_disp_elstner_25 = elstner_dispersion_collection(directories,functionals,snapshots_25)

In [25]:
# loading d2 d3 data
ligands = ['fluoroaniline','phenol','methylphenol','catechol','hydroxyaniline']
root_dir = 'dispersion/50_snaps/'
df_rel_mean_disp_25 = d2_d3_dispersion_correction(ligands,snapshots_25,mean_disp_elstner_25,root_dir)

## Dispersion, 10 snaps

In [26]:
# loading elstner data
functionals = 'PBE'
directories = [pathlib.Path.cwd() / 'phenol_outfiles',pathlib.Path.cwd() / 'catechol_outfiles',
              pathlib.Path.cwd() / 'methylphenol_outfiles',pathlib.Path.cwd() / 'fluoroaniline_outfiles',
              pathlib.Path.cwd() / 'hydroxyaniline_outfiles']
mean_disp_elstner_10 = elstner_dispersion_collection(directories,functionals,snapshots_10)

In [27]:
# loading d2 d3 data
ligands = ['fluoroaniline','phenol','methylphenol','catechol','hydroxyaniline']
root_dir = 'dispersion/50_snaps/'
df_rel_mean_disp_10 = d2_d3_dispersion_correction(ligands,snapshots_10,mean_disp_elstner_10,root_dir)

## 5 snaps

In [28]:
# loading elstner data
functionals = 'PBE'
directories = [pathlib.Path.cwd() / 'phenol_outfiles',pathlib.Path.cwd() / 'catechol_outfiles',
              pathlib.Path.cwd() / 'methylphenol_outfiles',pathlib.Path.cwd() / 'fluoroaniline_outfiles',
              pathlib.Path.cwd() / 'hydroxyaniline_outfiles']
mean_disp_elstner_5 = elstner_dispersion_collection(directories,functionals,snapshots_5)

In [29]:
# loading d2 d3 data
ligands = ['fluoroaniline','phenol','methylphenol','catechol','hydroxyaniline']
root_dir = 'dispersion/50_snaps/'
df_rel_mean_disp_5 = d2_d3_dispersion_correction(ligands,snapshots_5,mean_disp_elstner_5,root_dir)

# Construct new DataFrames and save to csv

In [30]:
corrected_10 = edit_pbe_df(corrected_10,df_rel_mean_disp_10)
corrected_10.to_csv(csv_target+'/EmpDisp_corrected_10.csv')

In [31]:
corrected_5 = edit_pbe_df(corrected_5,df_rel_mean_disp_5)
corrected_5.to_csv(csv_target+'/EmpDisp_corrected_5.csv')

In [32]:
uncorrected_10 = edit_pbe_df(uncorrected_10,df_rel_mean_disp_10)
uncorrected_10.to_csv(csv_target+'/EmpDisp_uncorrected_10.csv')

In [33]:
uncorrected_5 = edit_pbe_df(uncorrected_5,df_rel_mean_disp_5)
uncorrected_5.to_csv(csv_target+'/EmpDisp_uncorrected_5.csv')

In [38]:
uncorrected_25 = edit_pbe_df(uncorrected_25,df_rel_mean_disp_25)
uncorrected_25.to_csv(csv_target+'/EmpDisp_uncorrected_25.csv')

In [39]:
corrected_25 = edit_pbe_df(corrected_25,df_rel_mean_disp_25)
corrected_25.to_csv(csv_target+'/EmpDisp_corrected_25.csv')

In [36]:
corrected_25

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero
catechol,-8.412,-8.683,-8.482,-4.4,-12.2,-4.453,-7.381,-7.377,-7.341,-7.409
methylphenol,-8.664,-8.366,-7.808,-4.4,-10.1,-7.436,-6.804,-6.801,-6.729,-6.804
fluoroaniline,-5.726,-5.275,-5.483,-5.5,-6.3,-8.854,-5.748,-5.751,-5.796,-5.734
hydroxyaniline,-6.365,-7.216,-7.053,0.0,-8.2,-8.116,-3.565,-3.551,-3.407,-3.585


In [37]:
uncorrected_25

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero
catechol,-10.281,-10.702,-10.404,-4.4,-12.2,-4.462,-9.25,-9.246,-9.21,-9.278
methylphenol,-9.016,-9.387,-8.79,-4.4,-10.1,-7.611,-7.156,-7.153,-7.081,-7.156
fluoroaniline,-4.952,-4.926,-4.984,-5.5,-6.3,-8.975,-4.974,-4.978,-5.022,-4.961
hydroxyaniline,-8.064,-8.934,-8.763,0.0,-8.2,-8.156,-5.264,-5.251,-5.106,-5.285


In [20]:
corrected_10

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero
catechol,-9.678,-10.045,-9.664,-4.4,-12.2,-4.276,-9.031,-9.029,-9.022,-9.066
methylphenol,-6.327,-6.721,-5.714,-4.4,-10.1,-6.524,-3.851,-3.843,-3.728,-3.859
fluoroaniline,-5.467,-4.742,-5.446,-5.5,-6.3,-10.032,-4.852,-4.858,-4.915,-4.824
hydroxyaniline,-5.351,-5.939,-5.983,0.0,-8.2,-6.579,-2.143,-2.132,-1.998,-2.151


In [21]:
uncorrected_10

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero
catechol,-10.769,-11.285,-10.848,-4.4,-12.2,-4.332,-10.122,-10.119,-10.113,-10.157
methylphenol,-8.713,-9.628,-8.649,-4.4,-10.1,-6.624,-6.237,-6.229,-6.114,-6.245
fluoroaniline,-6.692,-6.217,-6.867,-5.5,-6.3,-10.128,-6.077,-6.084,-6.14,-6.05
hydroxyaniline,-8.04,-8.558,-8.719,0.0,-8.2,-6.619,-4.832,-4.821,-4.687,-4.84


In [22]:
corrected_5

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero
catechol,-8.41,-9.174,-8.911,-4.4,-12.2,-5.086,-6.913,-6.909,-6.876,-6.947
methylphenol,-5.72,-6.524,-5.588,-4.4,-10.1,-6.994,-2.114,-2.102,-1.939,-2.128
fluoroaniline,-5.596,-4.939,-5.447,-5.5,-6.3,-10.128,-5.126,-5.132,-5.19,-5.099
hydroxyaniline,-4.309,-5.78,-6.227,0.0,-8.2,-8.166,-0.089,-0.071,0.132,-0.101


In [23]:
uncorrected_5

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero
catechol,-11.517,-12.346,-12.182,-4.4,-12.2,-5.118,-10.021,-10.017,-9.984,-10.055
methylphenol,-9.704,-10.891,-9.961,-4.4,-10.1,-7.05,-6.098,-6.085,-5.923,-6.111
fluoroaniline,-6.475,-6.038,-6.484,-5.5,-6.3,-10.236,-6.004,-6.011,-6.068,-5.978
hydroxyaniline,-8.257,-9.507,-10.032,0.0,-8.2,-8.162,-4.037,-4.019,-3.816,-4.049
