In [2]:
import numpy as np
import pandas as pd
import oenotebook as oenb
from openeye.oechem import *
from openmoltools.openeye import *
from openeye.oeiupac import *

#from extraction import main
from test_extract import main

### Part 1: Checking SD Tags of Orion Output Data

In [3]:
#Use output .oeb file and the following lines to create OEMol object with SD Tags
input_file = 'output/testset_solv.oeb'
its = oechem.oemolistream(input_file)
molecule = oechem.OEMol()
oechem.OEReadMolecule(its, molecule)

tag_name = 'name'
#Check if a Certain SD Tag exists and its value
oechem.OEGetSDData(molecule, tag_name)

'dimethylsulfoxide'

In [4]:
def DumpSDData(mol):
    print("SD data of {}:".format(mol.GetTitle()))
    #Loop over SD Data
    for dp in oechem.OEGetSDDataPairs(mol):
        print(dp.GetTag())

DumpSDData(molecule)

SD data of methylsulfinylmethane:
Floe_report_URL_OPLMD
Floe_report_label_OPLMD
Floe_report_lig_svg_OPLMD
Floe_report_OPLMD
Title_OPLMD
Ligand_name_OPLMD
name
solvents
Final_State_Density_Std_OPLMD
Final_State_Density_OPLMD
Start_State_Density_Std_OPLMD
Start_State_Density_OPLMD
FE_Error_OPLMD
FE_OPLMD
IDAC_expt
density
molar_fractions
pressure
temperature
iterations_per_cube
current_iterations
Structure_Parmed_OPLMD
Collection_ID_OPLMD
ConfID_OPLMD
WellID_OPLMD
LigID_OPLMD


In [5]:
#Use imported function to convert .oeb input into a .csv
main('output/testset_solv.oeb', 'output/testset_solv.csv')

In [6]:
selfsolv = pd.read_csv('output/testset_ssolv.csv')
#selfsolv.columns = [x.lstrip() for x in selfsolv.columns]
selfsolv

Unnamed: 0,Molecule,Temperature(K),Pressure(atm),Solvents(smiles),Molar_fractions,FE(kcal/mol),DFE(kcal/mol),Density_Start(g/ml),DDensity_Start(g/ml),Density_Final(g/ml),DDensity_Final(g/ml),IDAC_expt
0,methylsulfinylmethane,337.85,1.0,CS(=O)C,1.0,-6.667071,0.125674,1.049733,0.017803,1.045612,0.013655,0.17
1,"1,2,4-trimethylbenzene",288.15,1.0,Cc1ccc(c(c1)C)C,1.0,-6.858908,0.154936,0.857648,0.006995,0.845297,0.008087,127600.0
2,"1,2,4-trimethylbenzene",298.15,1.0,Cc1ccc(c(c1)C)C,1.0,-6.864855,0.156568,0.842775,0.008959,0.839253,0.008442,115000.0


In [7]:
solv = pd.read_csv('output/testset_solv.csv')
solv

Unnamed: 0,Molecule,Temperature(K),Pressure(atm),Solvents(smiles),Molar_fractions,FE(kcal/mol),DFE(kcal/mol),Density_Start(g/ml),DDensity_Start(g/ml),Density_Final(g/ml),DDensity_Final(g/ml),IDAC_expt
0,methylsulfinylmethane,337.85,1.0,[H]O[H],1.0,-7.462691,0.106989,0.950628,0.013329,0.941492,0.012258,0.17
1,"1,2,4-trimethylbenzene",288.15,1.0,[H]O[H],1.0,-1.834387,0.093468,0.999114,0.01182,0.987008,0.010178,127600.0
2,"1,2,4-trimethylbenzene",298.15,1.0,[H]O[H],1.0,-1.502976,0.095626,0.987265,0.011227,0.980543,0.008545,115000.0


In [8]:
print('methylsulfinylmethane:', np.exp((-6.667 + 7.462)/(0.0019872041 * 337.85)) * (0.9415/1.0456))
print('1,2,4-trimethylbenzene:', np.exp((-1.834 + 6.859)/(0.0019872041 * 288.15)) * (0.987/0.845))
print('1,2,4-trimethylbenzene:', np.exp((-1.503 + 6.865)/(0.0019872041 * 298.15)) * (0.981/0.839))

methylsulfinylmethane: 2.9425067606550988
1,2,4-trimethylbenzene: 7562.03787488704
1,2,4-trimethylbenzene: 9960.48842068008


### Part 2: Calculating IDAC from Orion Output Data

Using the following formula: $$\gamma_{i}^{\infty} = exp\bigg(\frac{\Delta G_{i}^{solv} - \Delta G_{i}^{self-solv}}{k_{B}T} \bigg) \cdot \frac{\rho_{molar}^{solvent}}{\rho_{molar}^{pure-solute}} $$

In [36]:
def calculate_IDAC(solvation, self_solvation):
    table = pd.DataFrame(columns = ['Name', 'Temp', 'IDAC_calc', 'IDAC_expt'])
    table['Name'] = solvation['Molecule']
    table['Temp'] = solvation[' Temperature(K)']
    table['IDAC_calc'] = calc_idac([x for x in table['Temp']],
                                   [x for x in solvation['FE(kcal/mol)']],
                                   [x for x in self_solvation['FE(kcal/mol)']], 
                                   [x for x in solvation['Density_Final(g/ml)']],
                                   [x for x in self_solvation['Density_Final(g/ml)']])
    table['IDAC_expt'] = solvation[' IDAC_expt']
    
    return table

In [37]:
def calc_idac(temperature, G_solvation, G_self_solvation, top_density, bottom_density):
    '''
    top_density: final density in solvation
    bottom_density: final density in self-solvation
    '''
    IDAC_calc = []
    k = 0.0019872041 #units in kcal/mol*K
    for i in range(len(temperature)):
        IDAC_calc.append(np.exp((G_solvation[i] - G_self_solvation[i])/(k*temperature[i])) * (top_density[i]/bottom_density[i]))
    return IDAC_calc

In [38]:
final = calculate_IDAC(solv, selfsolv)

In [39]:
final

Unnamed: 0,Name,Temp,IDAC_calc,IDAC_expt
0,methylsulfinylmethane,337.85,0.275285,0.17
1,"1,2,4-trimethylbenzene",288.15,7553.11206,127600.0
2,"1,2,4-trimethylbenzene",298.15,9950.820347,115000.0


In [36]:
results = oenb.read_file_to_dataframe("output/results_final.oeb.gz")

In [37]:
smile = 'CCCCOC(=O)C'
sub_group = results[results['Solute SMILES'] == smile]

In [39]:
sub_group[sub_group['Solvent name'] == 'water']

Unnamed: 0,Molecule,expt IDAC,"expt IDAC, error",DG_solv (kcal/mol),dDG_solv (kcal/mol),kT log(gamma)_calc (kcal/mol),Solvent name,Temperature (K),dDG_self_solv (kcal/mol),"kT log(gamma)_calc, error (kcal/mol)","kT log(gamma)_expt, error (kcal/mol)",Solvent SMILES,kT log(gamma)_expt (kcal/mol),Solute SMILES,DG_self_solv (kcal/mol)
211,<oechem.OEMol; proxy of <Swig Object of type '...,613.0,3,-3.929219,0.12814,2.913815,water,273.35,0.19023,0.222547,0.004894,[H]O[H],3.48414,CCCCOC(=O)C,-6.93106


In [53]:
calc_idac = np.exp((2.913815)/(0.0019872041*273.35))

In [54]:
col = ['Name', 'Temperature (K)', '$\Delta G_{sol}$ (kcal/mol)', '$\Delta G_{selfsolv}$ (kcal/mol)', 'IDAC_calc', 'IDAC_exp']
compare = pd.DataFrame(columns = col)
compare.append(pd.DataFrame([['Chris', 298.15, -3.293, -6.239, 161.2, 814.00], ['Guilherme', 273.35, -3.929, -6.931, 213.6, 613]], columns = col))

Unnamed: 0,Name,Temperature (K),$\Delta G_{sol}$ (kcal/mol),$\Delta G_{selfsolv}$ (kcal/mol),IDAC_calc,IDAC_exp
0,Chris,298.15,-3.293,-6.239,161.2,814.0
1,Guilherme,273.35,-3.929,-6.931,213.6,613.0
