# Entropy Corrections to Binding Free energies

In [1]:
# Loads stuff
import sys
sys.path.insert(0,"/home/lg3u19/OnePy")
import onetep_v0_1 as op
import pathlib
import pandas as pd
pd.set_option('display.precision',3) 
import copy

In [2]:
csv_target_path = './ProcessedData/Entropy'
csv_source_path = './ProcessedData/EmpiricalDispersion'

In [3]:
snapshots_5 = ['24801', '32401', '17201', '9601','2001'] 
snapshots_10 = ['24801', '32401', '17201', '13401', '21001', '28601', '9601', '5801','36201','2001'] 
snapshots_25 = [2001, 3521, 5041, 6561, 8081, 9601, 11121, 12641, 14161, 15681, 17201, 18721, 20241, 21761, 23281, 24801, 26321, 27841, 29361, 30881, 32401, 33921, 35441, 36961, 38481]
snapshots_25 = [str(x) for x in snapshots_25]
snapshots_50 = [2001, 2761, 3521, 4281, 5041, 5801, 6561, 7321, 8081, 8841, 9601, 10361, 11121, 11881, 12641, 13401, 14161, 14921, 15681, 16441, 17201, 17961, 18721, 19481, 20241, 21001, 21761, 22521, 23281, 24041, 24801, 25561, 26321, 27081, 27841, 28601, 29361, 30121, 30881, 31641, 32401, 33161, 33921, 34681, 35441, 36201, 36961, 37721, 38481, 39241]
snapshots_50 = [str(x) for x in snapshots_50]

# Collect entropy data

In [4]:
entropy_dict ={}
for ligand in ['phenol','methylphenol','catechol','fluoroaniline','hydroxyaniline']:
    entropy_data = pd.read_csv('entropy/'+ligand+'_entropy.txt',delimiter=' ',names=['snapshot','S'])
    entropy_data['snapshot'] = entropy_data['snapshot'].str.split('.').str[1]
    entropy_data = entropy_data.set_index('snapshot') 
    entropy_data['S']=entropy_data['S'].str.strip('a')
    entropy_data = entropy_data.apply(pd.to_numeric)
    entropy_dict[ligand]=entropy_data
entropy_data=False

In [5]:
def entropy_mean(entropy_data,snapshots):
    """ retruns mean over subset of snapshots for entropy, in future, all snaps must be 
    present , else key error raised..."""
    #subset_df = entropy_data.loc[snapshots,:]
    subset_df = entropy_data.reindex(snapshots)
    return subset_df['S'].mean()

In [6]:
def entropy_relative(entropy_dict,snapshots,ligands,reference='phenol'):
    """ retruns series of mean entropies relative to reference """
    # get mean entropies over subset
    temp_dict_mean = {}
    for ligand1 in ligands:
        temp_dict_mean[ligand1] = entropy_mean(entropy_dict[ligand1],snapshots)
    # calc relative to reference entropy
    temp_dict_rel = {}
    for ligand2 in ligands:
        if ligand2!=reference:
            temp_dict_rel[ligand2] = temp_dict_mean[ligand2] - temp_dict_mean[reference]
    return pd.Series(temp_dict_rel)

In [7]:
ligands = ['phenol','methylphenol','catechol','fluoroaniline','hydroxyaniline']
entropy_50 = entropy_relative(entropy_dict,[str(x) for x in snapshots_50],ligands)
entropy_10 = entropy_relative(entropy_dict,[str(x) for x in snapshots_10],ligands)
entropy_5 = entropy_relative(entropy_dict,[str(x) for x in snapshots_5],ligands)
entropy_25 = entropy_relative(entropy_dict,[str(x) for x in snapshots_25],ligands)


In [8]:
entropy_df = pd.DataFrame([entropy_5,entropy_10,entropy_25,entropy_50])
entropy_df = entropy_df.rename(index = {0:5,1:10,2:25,3:50})
entropy_df = entropy_df.transpose()
entropy_df

Unnamed: 0,5,10,25,50
methylphenol,1.082,0.472,-1.198,-1.553
catechol,1.03,1.169,0.058,-0.396
fluoroaniline,2.853,1.773,-0.647,-1.155
hydroxyaniline,2.282,2.168,0.137,-1.013


## Load csv files for Emp_disp including binding free energies and modify with entropy and save to csv
Think a bit about which entropies to apply and which csv datasets to load/save

In [9]:
# Load EmpDisp corrected binding free energies from csv files
corrected_10 = pd.read_csv(csv_source_path+'/EmpDisp_corrected_10.csv',
                          index_col=0)
uncorrected_10 = pd.read_csv(csv_source_path+'/EmpDisp_uncorrected_10.csv',
                          index_col=0)
corrected_5 = pd.read_csv(csv_source_path+'/EmpDisp_corrected_5.csv',
                          index_col=0)
uncorrected_5 = pd.read_csv(csv_source_path+'/EmpDisp_uncorrected_5.csv',
                          index_col=0)
corrected_25 = pd.read_csv(csv_source_path+'/EmpDisp_corrected_25.csv',
                           index_col=0)
uncorrected_25 = pd.read_csv(csv_source_path+'/EmpDisp_uncorrected_25.csv',
                           index_col=0)


Apply entropy correction to each column in binding energy dataframe. Entropy values are TS. So need to subtract from binding energies the relative entropy values. Need to not subtract from exp result

## Apply entropy term of various types and save to csv 

In [10]:
# grab list of columns excluding Exp, which is column of experimental values
columns_without_exp = list(corrected_10.columns)
columns_without_exp.remove('Exp')

In [11]:
def write_entropy_csv(binding_df,entropy_series,columns_without_exp,
                      num_of_entropy_snaps,num_of_snaps,correction,path,exp_name='Exp'):
# subtract rel entropy from every column, and add back in Exp column
    df = pd.concat([binding_df.loc[:,columns_without_exp].sub(entropy_series,axis=0)
                    ,binding_df[exp_name]],axis=1,sort=True)
# save to csv
    if correction == True:
        name = 'Entropy'+str(num_of_entropy_snaps)+'_corrected_'+str(num_of_snaps)+'.csv'
    elif correction == False:
        name = 'Entropy'+str(num_of_entropy_snaps)+'_uncorrected_'+str(num_of_snaps)+'.csv'
    df.to_csv(path+'/'+name)
    return df

In [14]:
# 50 snap entropy, 25 snap corrected binding, EmpDisp present
ent50_corrected_25 = write_entropy_csv(corrected_25,entropy_50,
                                       columns_without_exp,50,25,True,csv_target_path)
ent50_corrected_25

Unnamed: 0,PBE,VV10,B97M-V,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero,Exp
catechol,-8.016,-8.286,-8.085,-11.804,-4.057,-6.985,-6.98,-6.945,-7.012,-4.4
fluoroaniline,-4.571,-4.12,-4.328,-5.145,-7.699,-4.593,-4.596,-4.641,-4.579,-5.5
hydroxyaniline,-5.352,-6.203,-6.04,-7.187,-7.103,-2.551,-2.538,-2.393,-2.572,0.0
methylphenol,-7.112,-6.813,-6.255,-8.547,-5.884,-5.251,-5.248,-5.177,-5.251,-4.4


In [15]:
# 25 snap entropy, 25 snap corrected binding, EmpDisp present
ent25_corrected_25 = write_entropy_csv(corrected_25,entropy_25,
                                       columns_without_exp,25,25,True,csv_target_path)
ent25_corrected_25

Unnamed: 0,PBE,VV10,B97M-V,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero,Exp
catechol,-8.47,-8.74,-8.54,-12.258,-4.511,-7.439,-7.434,-7.399,-7.466,-4.4
fluoroaniline,-5.079,-4.629,-4.837,-5.653,-8.208,-5.101,-5.105,-5.149,-5.088,-5.5
hydroxyaniline,-6.502,-7.354,-7.19,-8.337,-8.254,-3.702,-3.688,-3.544,-3.723,0.0
methylphenol,-7.466,-7.168,-6.61,-8.902,-6.238,-5.606,-5.603,-5.531,-5.606,-4.4


In [16]:
# 50 snap entropy, 10 snap corrected binding, EmpDisp present
ent50_corrected_10 = write_entropy_csv(corrected_10,entropy_50,
                                       columns_without_exp,50,10,True,csv_target_path)
ent50_corrected_10

Unnamed: 0,PBE,VV10,B97M-V,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero,Exp
catechol,-9.282,-9.649,-9.267,-11.804,-3.88,-8.635,-8.632,-8.626,-8.67,-4.4
fluoroaniline,-4.312,-3.587,-4.291,-5.145,-8.877,-3.697,-3.704,-3.76,-3.669,-5.5
hydroxyaniline,-4.338,-4.925,-4.969,-7.187,-5.566,-1.13,-1.119,-0.984,-1.137,0.0
methylphenol,-4.774,-5.168,-4.161,-8.547,-4.971,-2.298,-2.29,-2.175,-2.306,-4.4


In [17]:
# 50 snap entropy, 5 snap corrected binding, EmpDisp present
ent50_corrected_5 = write_entropy_csv(corrected_5,entropy_50,
                                       columns_without_exp,50,5,True,csv_target_path)
ent50_corrected_5

Unnamed: 0,PBE,VV10,B97M-V,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero,Exp
catechol,-8.013,-8.778,-8.515,-11.804,-4.69,-6.517,-6.513,-6.48,-6.551,-4.4
fluoroaniline,-4.442,-3.785,-4.292,-5.145,-8.973,-3.971,-3.977,-4.035,-3.944,-5.5
hydroxyaniline,-3.296,-4.767,-5.214,-7.187,-7.153,0.924,0.942,1.145,0.913,0.0
methylphenol,-4.167,-4.971,-4.035,-8.547,-5.441,-0.562,-0.549,-0.387,-0.575,-4.4


In [18]:
# 10 snap entropy, 10 snap corrected binding, EmpDisp present
ent10_corrected_10 = write_entropy_csv(corrected_10,entropy_10,
                                       columns_without_exp,10,10,True,csv_target_path)
ent10_corrected_10

Unnamed: 0,PBE,VV10,B97M-V,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero,Exp
catechol,-10.848,-11.215,-10.833,-13.369,-5.445,-10.2,-10.198,-10.191,-10.236,-4.4
fluoroaniline,-7.24,-6.515,-7.22,-8.073,-11.805,-6.625,-6.632,-6.688,-6.598,-5.5
hydroxyaniline,-7.519,-8.107,-8.151,-10.368,-8.747,-4.312,-4.3,-4.166,-4.319,0.0
methylphenol,-6.798,-7.193,-6.186,-10.572,-6.996,-4.322,-4.314,-4.199,-4.33,-4.4


In [19]:
# 5 snap entropy, 5 snap corrected binding, EmpDisp present
ent5_corrected_5 = write_entropy_csv(corrected_5,entropy_5,
                                       columns_without_exp,5,5,True,csv_target_path)
ent5_corrected_5

Unnamed: 0,PBE,VV10,B97M-V,5_snaps_thesis,MM,PBE_bj,PBE_bjm,PBE_old,PBE_zero,Exp
catechol,-9.44,-10.204,-9.941,-13.23,-6.116,-7.943,-7.94,-7.906,-7.977,-4.4
fluoroaniline,-8.45,-7.793,-8.301,-9.153,-12.981,-7.979,-7.985,-8.043,-7.953,-5.5
hydroxyaniline,-6.591,-8.062,-8.509,-10.482,-10.448,-2.371,-2.353,-2.15,-2.383,0.0
methylphenol,-6.802,-7.606,-6.67,-11.182,-8.076,-3.196,-3.184,-3.021,-3.209,-4.4
