# Binding energies
Process QM calculations to generate binding energy results to save as csv for easy processing.

In [95]:
# Loads stuff
import sys
sys.path.insert(0,"/home/lg3u19/OnePy")
import onetep_v0_1 as op
import pathlib
import pandas as pd
pd.set_option('display.precision',3) 
import copy

In [96]:
csv_target_path = './ProcessedData/Binding'

In [97]:
snapshots_5 = ['24801', '32401', '17201', '9601','2001'] 
snapshots_10 = ['24801', '32401', '17201', '13401', '21001', '28601', '9601', '5801','36201','2001'] 
snapshots_25 = [2001, 3521, 5041, 6561, 8081, 9601, 11121, 12641, 14161, 15681, 17201, 18721, 20241, 21761, 23281, 24801, 26321, 27841, 29361, 30881, 32401, 33921, 35441, 36961, 38481]
snapshots_25 = [str(x) for x in snapshots_25]
snapshots_50 = [2001, 2761, 3521, 4281, 5041, 5801, 6561, 7321, 8081, 8841, 9601, 10361, 11121, 11881, 12641, 13401, 14161, 14921, 15681, 16441, 17201, 17961, 18721, 19481, 20241, 21001, 21761, 22521, 23281, 24041, 24801, 25561, 26321, 27081, 27841, 28601, 29361, 30121, 30881, 31641, 32401, 33161, 33921, 34681, 35441, 36201, 36961, 37721, 38481, 39241]
snapshots_50 = [str(x) for x in snapshots_50]

## Functions for QM calcs

In [98]:
def cavity_correction(E_host_non_pol,E_complex_non_polar):
    return 7.116*(E_host_non_pol-E_complex_non_polar)


In [99]:
def binding_free_energy(complex_obj,host_obj,ligand_obj,units='Ha',host_cavity_correction=True,entropy=False):
    delta_E = complex_obj.total_energy_vac - host_obj.total_energy_vac - ligand_obj.total_energy_vac
    delta_solvation = complex_obj.total_solvation_energy - host_obj.total_solvation_energy - ligand_obj.total_solvation_energy
    
    if host_cavity_correction==True:
        cavity_corr = cavity_correction(host_obj.total_apolar_energy,complex_obj.total_apolar_energy)
        delta_solvation_corrected = delta_solvation + cavity_corr
    else:
        delta_solvation_corrected=delta_solvation
    if entropy==False:   
        binding_free_energy = delta_E + delta_solvation_corrected
    else:
        binding_free_energy = delta_E + delta_solvation_corrected - entropy
     
    if units=='Ha':
        return delta_E,delta_solvation,delta_solvation_corrected,entropy,binding_free_energy
    elif units=='kcal/mol':
        return op.hartree_to_kcal_mol(delta_E),op.hartree_to_kcal_mol(delta_solvation),\
               op.hartree_to_kcal_mol(delta_solvation_corrected),\
               op.hartree_to_kcal_mol(entropy),op.hartree_to_kcal_mol(binding_free_energy)

In [100]:
def exclude_function(snapshot,functional):
    """ Functions that returns total energy in vac, solvation enerygm, and apolar energy
    for given snapshots and functionals from data dictionary with pre-filled values"""
    if functional == False:
        print ('Needs to know functional')
        return False
    data = {'BLYP':{'complex_17201':{'E_vac':-11703.28810914757196,
                             'E_solv':-11707.17781111132354,
                             'apolar':2.33371847090407+-1.67776855169471},
                    'host_17201':{'E_vac':-11642.52098752837628,
                             'E_solv':-11646.40564469317906,
                             'apolar':2.35343919310646+-1.69194627190406},
                    'complex_24801':{'E_vac':-11703.26093899935950,
                             'E_solv':-11707.23062178841792,
                             'apolar':2.34244656224695+-1.68404339476339},
                    'host_24801':{'E_vac':-11642.50372680707369,
                             'E_solv':-11646.47373658255674,
                             'apolar':2.35905073333339+-1.69598054846171},
                    'complex_32401':{'E_vac':-11703.48681972792838,
                             'E_solv':-11707.25277486920822,
                             'apolar':2.36436511707726+-1.69980119179477}
                   },
            'VV10':{'complex_24801':{'E_vac':-11755.75196689821314,
                             'E_solv':-11759.71220481162891,
                             'apolar':2.27888540828097+-1.63834769214840},
                    'host_24801':{'E_vac':-11694.67952414450701,
                             'E_solv':-11698.63966000627261,
                             'apolar':2.29724655713729+-1.65154798108993},
                    'complex_32401':{'E_vac':-11755.99253040028270,
                             'E_solv':-11759.76183088717517,
                             'apolar':2.29984764129358+-1.65341796551699},
                    'host_32401':{'E_vac':-11694.93358778918628,
                             'E_solv':-11698.69660710869357,
                             'apolar':2.32170136252029+-1.66912915204990}
                    }}
  
    return data[functional][snapshot]['E_vac'],data[functional][snapshot]['E_solv']-data[functional][snapshot]['E_vac'],data[functional][snapshot]['apolar']
    

In [101]:
def get_average_binding_manual(outfile_dir,correction=True,exclude_list=[],functional=False,
                        exclude_function=False,snapshots=False): 
    """ Given a directory of outfiles, returns df with relevant values for QMPBSA as 
    well as series object with averages over all snaphsots of these values"""
    # create dictionary of onetep objects for each outfile
    object_dict = op.load_out_files(outfile_dir,format_flag=True,
                                    delim='_',split_num=0)
    
    
    # determine which snapshots to process
    if snapshots==False: # if false, use all snapshots present
        # find out which snapshots we have
        snapshots = []
        for key in object_dict.keys():
            snapshot = key.split(sep='_')[-1]
            if snapshot not in snapshots:
                snapshots.append(snapshot)
    else: # if not false, a list of snapshots was passed
        pass
        
    # laod data from onetep objects 
    # allows for excluded snapshots for some functionals for which a separate
    # function is called to assign key values determine in advance from 
    # output files or restarted calculations
    # exclude list must have accurate file names
    for key in object_dict.keys():
        #print (key)
        object_dict[key].get_atom_counts()
        object_dict[key].get_input_flags()
        object_dict[key].get_dispersion()
        if key in exclude_list:
            vac,solvation,apolar = exclude_function(key,functional)
            object_dict[key].total_energy_vac = vac
            object_dict[key].total_solvation_energy = solvation
            object_dict[key].total_apolar_energy = apolar
            
        else:
            object_dict[key].check_is_auto_solvation()
            object_dict[key].get_total_time()
            object_dict[key].get_energy_conv()
            object_dict[key].get_solvation_summary_new()
        
    # generate a df with values of interest
    temp_dir = {}
    for snapshot in snapshots:
        temp_dir[snapshot]=binding_free_energy(object_dict['complex_'+snapshot],
                                                          object_dict['host_'+snapshot],
                                                          object_dict['ligand_'+snapshot],
                                                          units='kcal/mol',host_cavity_correction=correction)
    object_dict_bind = pd.DataFrame.from_dict(temp_dir,orient='index',
                                              columns=['E','G_solv','G_solv_corrected','S','G_bind'])
    object_dict_bind.drop(labels=['S'],axis='columns',inplace=True)
    
    return object_dict_bind, object_dict_bind.mean()

In [102]:
def get_average_binding(outfile_dir,correction=True,snapshots=False): 
    """ Given a directory of outfiles, returns df with relevant values for QMPBSA as 
    well as series object with averages over all snaphsots of these values"""
    # create dictionary of onetep objects for each outfile
    object_dict = op.load_out_files(outfile_dir,format_flag=True,
                                    delim='_',split_num=0)
    # laod data from onetep objects 
    for key in object_dict.keys():
        #print (key)
        object_dict[key].check_is_auto_solvation()
        object_dict[key].get_total_time()
        object_dict[key].get_atom_counts()
        object_dict[key].get_energy_conv()
        object_dict[key].get_input_flags()
        object_dict[key].get_solvation_summary_new()
        object_dict[key].get_dispersion()
        
    # determine which snapshots to process
    if snapshots==False: # if false, use all snapshots present
        # find out which snapshots we have
        snapshots = []
        for key in object_dict.keys():
            snapshot = key.split(sep='_')[-1]
            if snapshot not in snapshots:
                snapshots.append(snapshot)
    else: # if not false, a list of snapshots was passed
        pass
        

    # generate a df with values of interest
    temp_dir = {}
    for snapshot in snapshots:
        temp_dir[snapshot]=binding_free_energy(object_dict['complex_'+snapshot],
                                                          object_dict['host_'+snapshot],
                                                          object_dict['ligand_'+snapshot],
                                                          units='kcal/mol',host_cavity_correction=correction)
    object_dict_bind = pd.DataFrame.from_dict(temp_dir,orient='index',
                                              columns=['E','G_solv','G_solv_corrected','S','G_bind'])
    object_dict_bind.drop(labels=['S'],axis='columns',inplace=True)
    
    return object_dict_bind, object_dict_bind.mean()

In [103]:
def load_data(outfile_dir_root,functionals,correction=False,snapshots=False):
    """ given directory of dat files, gets avera binding energies with or without cavity correction """
    list_of_mean_series = []
    for functional in functionals:
        #print (functional+"*****************************")
        outfile_dir = outfile_dir_root / functional
        summary_df, mean_series = get_average_binding(outfile_dir,correction=correction,snapshots=snapshots)
        mean_series.name = functional
        list_of_mean_series.append(mean_series)
    return pd.concat(list_of_mean_series,axis=1) 
    

In [104]:
def load_data_all(outfile_dir_root,functionals,correction=False,snapshots=False):
    """ given directory of dat files, gets avera binding energies with or without 
        cavity correction and also returns raw data """
    list_of_mean_series = []
    dict_of_summary_df = {}
    for functional in functionals:
        outfile_dir = outfile_dir_root / functional
        summary_df, mean_series = get_average_binding(outfile_dir,correction=correction,snapshots=snapshots)
        mean_series.name = functional
        list_of_mean_series.append(mean_series)
        dict_of_summary_df[functional] = summary_df
    return pd.concat(list_of_mean_series,axis=1) , dict_of_summary_df
    

In [105]:
def binding_energy_workflow(snapshots_active,correction_active,mm_series,functionals = ['PBE','VV10','B97M-V']):
    """ The whole binding free energy analysis from the original Analysis notebook shoved into 
        a single giant function """ 
    # Load QM calculations into data structures
    phenol = load_data(pathlib.Path.cwd() / 'phenol_outfiles', 
                         functionals, snapshots=snapshots_active,correction=correction_active)
   # print ("phenol done*****************************")
    catechol = load_data(pathlib.Path.cwd() / 'catechol_outfiles',
                         functionals,snapshots=snapshots_active,correction=correction_active)
   # print ('catechol done*****************************')
    fluoroaniline = load_data( pathlib.Path.cwd() / 'fluoroaniline_outfiles', 
                         functionals,snapshots=snapshots_active,correction=correction_active)
    #print ("Flouro done*****************************")
    hydroxyaniline = load_data( pathlib.Path.cwd() / 'hydroxyaniline_outfiles', 
                         functionals, snapshots=snapshots_active,correction=correction_active)
  #  print ("Hydroxy done*****************************")

    # Special case for Methylphenol where some manual data manipulation is required due to restarted files
    # Manual corrections to BLYP not currently needed but kept for the record
    exclude_list_BLYP = ['complex_17201','host_17201','complex_24801','host_24801','complex_32401']
    exclude_list_VV10 = ['complex_24801','host_24801','complex_32401','host_32401']
    functionals = ['PBE','VV10','B97M-V']
    outfile_dir_root = pathlib.Path.cwd() / 'methylphenol_outfiles'
    list_of_mean_series = []
    for functional in functionals:
        #print (functional+"*******************************")
        outfile_dir = outfile_dir_root / functional
        if functional=='BLYP':
            summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                                exclude_function=exclude_function,
                                                                exclude_list=exclude_list_BLYP,
                                                                functional='BLYP',snapshots=snapshots_active)
        elif functional=='VV10':
            summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                                exclude_function=exclude_function,
                                                                exclude_list=exclude_list_VV10,
                                                                functional='VV10',snapshots=snapshots_active)
        else:
            summary_df, mean_series = get_average_binding(outfile_dir,correction_active,snapshots=snapshots_active)

        mean_series.name = functional
        list_of_mean_series.append(mean_series)
    methylphenol = pd.concat(list_of_mean_series,axis=1)

    # apply normalization of relative binding free energies
    normalization_series = - 5.6 - phenol.loc['G_bind':,:] # find normalization vecotr based on exp value of 5.6 for phenol
    catechol_normalized = catechol.loc['G_bind':,:] + normalization_series
    methylphenol_normalized = methylphenol.loc['G_bind':,:] + normalization_series
    fluoroaniline_normalized = fluoroaniline.loc['G_bind':,:] + normalization_series
    hydroxyaniline_normalized = hydroxyaniline.loc['G_bind':,:] + normalization_series

    binding_energy_df = catechol_normalized.merge(methylphenol_normalized,how='outer')
    binding_energy_df = binding_energy_df.merge(fluoroaniline_normalized,how='outer')
    binding_energy_df = binding_energy_df.merge(hydroxyaniline_normalized,how='outer')
    binding_energy_df['Exp']=[-4.4,-4.4,-5.5,0.0]
    binding_energy_df['5_snaps_thesis']=[-12.2,-10.1,-6.3,-8.2]
    binding_energy_df.rename(index = {0:'catechol',1:"methylphenol",2:'fluoroaniline',3:'hydroxyaniline'},inplace=True)
    # Could also add dispersion, as done in original document
    #binding_energy_df = edit_pbe_df(binding_energy_df,df_rel_mean_disp_10)
    # Add MM results 
    binding_energy_df['MM'] = mm_series
    return binding_energy_df

## Function for MM Binding energies

In [106]:
# Creat a dictionary in which each ligand has a DF of all data from .csv files for every snapshot
mm_dict = {}
for ligand in ['phenol','methylphenol','catechol','fluoroaniline','hydroxyaniline']:
    # load data from file
    mm_data = pd.read_csv('MM-PBSA/'+ligand+'/MM-energies-solvation.csv',delimiter=';')
    # set first column as index and relable to snaphot
    mm_data = mm_data.set_index(mm_data.columns[0])
    mm_data.index.name = 'snapshot'
    # select only snaphsots and get ridd of rest of file
    mm_data = mm_data.loc['2001':'39963',:]
    # drop empyt columns 
    mm_data = mm_data.dropna(axis=1)
    # deal with odd formatting of methylphenol file
    if 'Unnamed: 13' in mm_data.columns:
        mm_data = mm_data.drop('Unnamed: 13',axis=1)
    # rename columns
    mm_data.columns=['comp-gas','comp-polar',
                        'comp-non-polar','comp-total','host-gas',
                        'host-polar','host-non-polar','host-total',
                        'lig-gas','lig-polar','lig-non-polar',
                        'lig-total','net-gas','net-polar',
                        'net-non-polar','net-total']
    # change data type to numeric
    mm_data = mm_data.apply(pd.to_numeric)
    # add df to dictionary
    mm_dict[ligand]=mm_data
# make sure i dont accidenatlly use temp variable mm_data
mm_data = False
    

In [107]:
def mm_binding_energy(mm_data,snapshots,correction=True,entropy=False):
    """ given df with mm data on all snapshots, calcs mean binding eneryg 
    with our without cavity correction for a specified set of snapshots, 
    option to extend with entropy """
    # select subset 
    if snapshots=='All':
        subset_df = mm_data.loc[:,:]
    else:
        subset_df = mm_data.loc[snapshots,:]
    # calc mean binding energy depending on if cav correction enabled
    if correction == False:
        binding_energy = subset_df['net-total'].mean()
    elif correction == True:
        # determine mm cav correction term
        subset_df['correction']=2*(subset_df['host-non-polar']-subset_df['comp-non-polar'])
        subset_df['binding']=subset_df['net-total']+subset_df['correction']
        binding_energy = subset_df['binding'].mean()
    return binding_energy

In [108]:
def mm_binding_energy_gas(mm_data,snapshots,entropy=False):
    """ given df with mm data on all snapshots, calcs mean binding eneryg 
    with our without cavity correction for a specified set of snapshots, 
    option to extend with entropy """
    # select subset 
    if snapshots=='All':
        subset_df = mm_data.loc[:,:]
    else:
        subset_df = mm_data.loc[snapshots,:]
    # calc mean binding energy depending on if cav correction enabled
    binding_energy = subset_df['net-gas'].mean()
    return binding_energy

In [109]:
def mm_binding_energy_series(mm_dict,snapshots,ligands,correction=True,entropy=False,reference='phenol',ref_exp=-5.6):
    """ Given dict of mm data for each ligand, returns series with relative normalized binding energies relatibve to
    some reference, default phenol, with exp value of -5.6 kcal/mol. """
    # first get 'abs' binding energy for each ligand
    temp_dict_mean = {}
    for ligand1 in ligands: 
        temp_dict_mean[ligand1]=mm_binding_energy(mm_dict[ligand1],snapshots,correction,entropy)
        #temp_dict_mean[ligand1]=mm_binding_energy_gas(mm_dict[ligand1],snapshots,entropy)
    # det relative binding energy for non-ref ligands
    temp_dict_mean_rel = {}
    # calc normalization factor
    norm = ref_exp - temp_dict_mean[reference]
    for ligand2 in ligands:
        if ligand2 != reference:
            # apply norm to all non-ref ligands
            temp_dict_mean_rel[ligand2]=norm + temp_dict_mean[ligand2]
    # convert to series and return
    return pd.Series(temp_dict_mean_rel)

In [110]:
# create data series for MM binding free energies 
# need to add 25 snaps as well
ligands = ['phenol','methylphenol','catechol','fluoroaniline','hydroxyaniline']
mm_5_uncorrected = mm_binding_energy_series(mm_dict,[str(x) for x in snapshots_5],ligands,correction=False)
mm_5_corrected = mm_binding_energy_series(mm_dict,[str(x) for x in snapshots_5],ligands,correction=True)
mm_10_uncorrected = mm_binding_energy_series(mm_dict,[str(x) for x in snapshots_10],ligands,correction=False)
mm_10_corrected = mm_binding_energy_series(mm_dict,[str(x) for x in snapshots_10],ligands,correction=True)
mm_50_uncorrected = mm_binding_energy_series(mm_dict,[str(x) for x in snapshots_50],ligands,correction=False)
mm_50_corrected = mm_binding_energy_series(mm_dict,[str(x) for x in snapshots_50],ligands,correction=True)
mm_25_uncorrected = mm_binding_energy_series(mm_dict,[str(x) for x in snapshots_25],ligands,correction=False)
mm_25_corrected = mm_binding_energy_series(mm_dict,[str(x) for x in snapshots_25],ligands,correction=True)

## 25 Snaps
### No cavity correction

In [111]:
uncorrected_25 = binding_energy_workflow(snapshots_25,False,mm_25_uncorrected)

In [112]:
uncorrected_25.to_csv(csv_target_path+'/uncorrected_25.csv')
uncorrected_25

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM
catechol,-10.281,-10.702,-10.404,-4.4,-12.2,-4.462
methylphenol,-9.016,-9.387,-8.79,-4.4,-10.1,-7.611
fluoroaniline,-4.952,-4.926,-4.984,-5.5,-6.3,-8.975
hydroxyaniline,-8.064,-8.934,-8.763,0.0,-8.2,-8.156


## Corrected

In [113]:
corrected_25 = binding_energy_workflow(snapshots_25,True,mm_25_corrected)
corrected_25.to_csv(csv_target_path+'/corrected_25.csv')
corrected_25

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM
catechol,-8.412,-8.683,-8.482,-4.4,-12.2,-4.453
methylphenol,-8.664,-8.366,-7.808,-4.4,-10.1,-7.436
fluoroaniline,-5.726,-5.275,-5.483,-5.5,-6.3,-8.854
hydroxyaniline,-6.365,-7.216,-7.053,0.0,-8.2,-8.116


## 10 Snaps
### No cavity_correction

In [87]:
uncorrected_10 = binding_energy_workflow(snapshots_10,False,mm_10_uncorrected)

In [88]:
uncorrected_10.to_csv(csv_target_path+'/uncorrected_10.csv')
uncorrected_10

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM
catechol,-10.769,-11.285,-10.848,-4.4,-12.2,-4.332
methylphenol,-8.713,-9.628,-8.649,-4.4,-10.1,-6.624
fluoroaniline,-6.692,-6.217,-6.867,-5.5,-6.3,-10.128
hydroxyaniline,-8.04,-8.558,-8.719,0.0,-8.2,-6.619


### Cavity correction

In [89]:
corrected_10 = binding_energy_workflow(snapshots_10,True,mm_10_corrected)

In [90]:
corrected_10.to_csv(csv_target_path+'/corrected_10.csv')
corrected_10

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM
catechol,-9.678,-10.045,-9.664,-4.4,-12.2,-4.276
methylphenol,-6.327,-6.721,-5.714,-4.4,-10.1,-6.524
fluoroaniline,-5.467,-4.742,-5.446,-5.5,-6.3,-10.032
hydroxyaniline,-5.351,-5.939,-5.983,0.0,-8.2,-6.579


## 5 Snapshots
### Uncorrected

In [91]:
uncorrected_5 = binding_energy_workflow(snapshots_5,False,mm_5_uncorrected)

In [92]:
uncorrected_5.to_csv(csv_target_path+'/uncorrected_5.csv')
uncorrected_5

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM
catechol,-11.517,-12.346,-12.182,-4.4,-12.2,-5.118
methylphenol,-9.704,-10.891,-9.961,-4.4,-10.1,-7.05
fluoroaniline,-6.475,-6.038,-6.484,-5.5,-6.3,-10.236
hydroxyaniline,-8.257,-9.507,-10.032,0.0,-8.2,-8.162


### Corrected

In [93]:
corrected_5 = binding_energy_workflow(snapshots_5,True,mm_5_corrected)

In [94]:
corrected_5.to_csv(csv_target_path+'/corrected_5.csv')
corrected_5

Unnamed: 0,PBE,VV10,B97M-V,Exp,5_snaps_thesis,MM
catechol,-8.41,-9.174,-8.911,-4.4,-12.2,-5.086
methylphenol,-5.72,-6.524,-5.588,-4.4,-10.1,-6.994
fluoroaniline,-5.596,-4.939,-5.447,-5.5,-6.3,-10.128
hydroxyaniline,-4.309,-5.78,-6.227,0.0,-8.2,-8.166


## Generate Energy component per snapshot data files for future analysis

In [114]:
def generate_componente_data_files(ligand_name,outfile_path,functionals,
                                   correction, snapshots,target_path):
    """ For all functinals for one ligand generates csv files with 
    binding energy components for each snapshot and saves to taget_path
    Will not naively work for methylphenol due to failed calcs...."""
    target_path = pathlib.Path(target_path)
    summary, data = load_data_all(outfile_path,functionals,correction=correction,
                                  snapshots=snapshots)
    for functional in functionals:
        filename = ligand_name+'_'+functional+'_'+str(len(snapshots))
        if correction == True:
            filename = filename + '_corrected.csv'
        else:
            filename = filename + '_uncorrected.csv'
        data[functional].to_csv(target_path/filename)
        

In [125]:
functionals = ['PBE','VV10','B97M-V']
# generate 10 snaps corrected, except methylphenol
for ligand in ['phenol','catechol','fluoroaniline','hydroxyaniline']:
    path_end = ligand+'_outfiles'
    generate_componente_data_files(ligand,pathlib.Path.cwd() / path_end,
                                functionals,correction=True, snapshots=snapshots_10,
                                target_path='./ProcessedData/BindingComponents')
    

In [126]:
# generate 10 snaps uncorrected, except methylphenol
for ligand in ['phenol','catechol','fluoroaniline','hydroxyaniline']:
    path_end = ligand+'_outfiles'
    generate_componente_data_files(ligand,pathlib.Path.cwd() / path_end,
                                functionals,correction=False, snapshots=snapshots_10,
                                target_path='./ProcessedData/BindingComponents')

In [128]:
# generate 25 snaps uncorrected, except methylphenol
for ligand in ['phenol','catechol','fluoroaniline','hydroxyaniline']:
    path_end = ligand+'_outfiles'
    generate_componente_data_files(ligand,pathlib.Path.cwd() / path_end,
                                functionals,correction=False, snapshots=snapshots_25,
                                target_path='./ProcessedData/BindingComponents')
    

In [129]:
# generate 25 snaps corrected, except methylphenol
for ligand in ['phenol','catechol','fluoroaniline','hydroxyaniline']:
    path_end = ligand+'_outfiles'
    generate_componente_data_files(ligand,pathlib.Path.cwd() / path_end,
                                functionals,correction=True, snapshots=snapshots_25,
                                target_path='./ProcessedData/BindingComponents')

In [None]:
# methylphenol, 10 snaps, correction
correction_active=True
snapshots_active=snapshots_10
target_path = pathlib.Path('./ProcessedData/BindingComponents') 
# Special case for Methylphenol where some manual data manipulation is required due to restarted files
# Manual corrections to BLYP not currently needed but kept for the record
exclude_list_BLYP = ['complex_17201','host_17201','complex_24801','host_24801','complex_32401']
exclude_list_VV10 = ['complex_24801','host_24801','complex_32401','host_32401']
functionals = ['PBE','VV10','B97M-V']
outfile_dir_root = pathlib.Path.cwd() / 'methylphenol_outfiles'
dict_of_summary_df = {}
for functional in functionals:
    outfile_dir = outfile_dir_root / functional
    if functional=='BLYP':
        summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                            exclude_function=exclude_function,
                                                            exclude_list=exclude_list_BLYP,
                                                            functional='BLYP',snapshots=snapshots_active)
    elif functional=='VV10':
        summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                            exclude_function=exclude_function,
                                                            exclude_list=exclude_list_VV10,
                                                            functional='VV10',snapshots=snapshots_active)
    else:
        summary_df, mean_series = get_average_binding(outfile_dir,correction_active,snapshots=snapshots_active)

    dict_of_summary_df[functional]=summary_df
for functional in functionals:
        filename = 'methylphenol_'+functional+'_'+str(len(snapshots_active))
        if correction_active == True:
            filename = filename + '_corrected.csv'
        else:
            filename = filename + '_uncorrected.csv'
        dict_of_summary_df[functional].to_csv(target_path/filename)
        

In [None]:
# methylphenol, 10 snaps, uncorrected
correction_active=False
snapshots_active=snapshots_10
target_path = pathlib.Path('./ProcessedData/BindingComponents') 
# Special case for Methylphenol where some manual data manipulation is required due to restarted files
# Manual corrections to BLYP not currently needed but kept for the record
exclude_list_BLYP = ['complex_17201','host_17201','complex_24801','host_24801','complex_32401']
exclude_list_VV10 = ['complex_24801','host_24801','complex_32401','host_32401']
functionals = ['PBE','VV10','B97M-V']
outfile_dir_root = pathlib.Path.cwd() / 'methylphenol_outfiles'
dict_of_summary_df = {}
for functional in functionals:
    outfile_dir = outfile_dir_root / functional
    if functional=='BLYP':
        summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                            exclude_function=exclude_function,
                                                            exclude_list=exclude_list_BLYP,
                                                            functional='BLYP',snapshots=snapshots_active)
    elif functional=='VV10':
        summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                            exclude_function=exclude_function,
                                                            exclude_list=exclude_list_VV10,
                                                            functional='VV10',snapshots=snapshots_active)
    else:
        summary_df, mean_series = get_average_binding(outfile_dir,correction_active,snapshots=snapshots_active)

    dict_of_summary_df[functional]=summary_df
for functional in functionals:
        filename = 'methylphenol_'+functional+'_'+str(len(snapshots_active))
        if correction_active == True:
            filename = filename + '_corrected.csv'
        else:
            filename = filename + '_uncorrected.csv'
        dict_of_summary_df[functional].to_csv(target_path/filename)
        

In [None]:
# methylphenol, 25 snaps, uncorrected
correction_active=False
snapshots_active=snapshots_25
target_path = pathlib.Path('./ProcessedData/BindingComponents') 
# Special case for Methylphenol where some manual data manipulation is required due to restarted files
# Manual corrections to BLYP not currently needed but kept for the record
exclude_list_BLYP = ['complex_17201','host_17201','complex_24801','host_24801','complex_32401']
exclude_list_VV10 = ['complex_24801','host_24801','complex_32401','host_32401']
functionals = ['PBE','VV10','B97M-V']
outfile_dir_root = pathlib.Path.cwd() / 'methylphenol_outfiles'
dict_of_summary_df = {}
for functional in functionals:
    outfile_dir = outfile_dir_root / functional
    if functional=='BLYP':
        summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                            exclude_function=exclude_function,
                                                            exclude_list=exclude_list_BLYP,
                                                            functional='BLYP',snapshots=snapshots_active)
    elif functional=='VV10':
        summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                            exclude_function=exclude_function,
                                                            exclude_list=exclude_list_VV10,
                                                            functional='VV10',snapshots=snapshots_active)
    else:
        summary_df, mean_series = get_average_binding(outfile_dir,correction_active,snapshots=snapshots_active)

    dict_of_summary_df[functional]=summary_df
for functional in functionals:
        filename = 'methylphenol_'+functional+'_'+str(len(snapshots_active))
        if correction_active == True:
            filename = filename + '_corrected.csv'
        else:
            filename = filename + '_uncorrected.csv'
        dict_of_summary_df[functional].to_csv(target_path/filename)
        

In [None]:
# methylphenol, 10 snaps, correction
correction_active=True
snapshots_active=snapshots_25
target_path = pathlib.Path('./ProcessedData/BindingComponents') 
# Special case for Methylphenol where some manual data manipulation is required due to restarted files
# Manual corrections to BLYP not currently needed but kept for the record
exclude_list_BLYP = ['complex_17201','host_17201','complex_24801','host_24801','complex_32401']
exclude_list_VV10 = ['complex_24801','host_24801','complex_32401','host_32401']
functionals = ['PBE','VV10','B97M-V']
outfile_dir_root = pathlib.Path.cwd() / 'methylphenol_outfiles'
dict_of_summary_df = {}
for functional in functionals:
    outfile_dir = outfile_dir_root / functional
    if functional=='BLYP':
        summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                            exclude_function=exclude_function,
                                                            exclude_list=exclude_list_BLYP,
                                                            functional='BLYP',snapshots=snapshots_active)
    elif functional=='VV10':
        summary_df, mean_series = get_average_binding_manual(outfile_dir,correction=correction_active,
                                                            exclude_function=exclude_function,
                                                            exclude_list=exclude_list_VV10,
                                                            functional='VV10',snapshots=snapshots_active)
    else:
        summary_df, mean_series = get_average_binding(outfile_dir,correction_active,snapshots=snapshots_active)

    dict_of_summary_df[functional]=summary_df
for functional in functionals:
        filename = 'methylphenol_'+functional+'_'+str(len(snapshots_active))
        if correction_active == True:
            filename = filename + '_corrected.csv'
        else:
            filename = filename + '_uncorrected.csv'
        dict_of_summary_df[functional].to_csv(target_path/filename)
        