# Get HBI data

In [1]:
import sys
sys.path.insert(0,"/home/gridsan/hwpang/Software/RMG-Py/")
sys.path.insert(0,"..")

import random
import os
import time
import math
from copy import deepcopy
import pandas as pd
from joblib import Parallel, delayed
from tqdm import tqdm

from rmgpy.data.thermo import ThermoDatabase, ThermoLibrary, ThermoData, remove_thermo_data, add_thermo_data, NASA
from rmgpy.molecule import Molecule
from rmgpy.species import Species
from rmgpy import settings
from rmgpy import constants

from tree.utils import load_thermo_lib_by_path, generate_thermo
from tree.parameters import Ts



# Load data to join

In [2]:
radical_data_df = pd.read_csv("../data/radical_data_no_dup.csv")
resonance_radical_df = pd.read_csv("../data/resonance_radicals.csv")
closed_shell_data_df = pd.read_csv("../data/closed_shell_data.csv")

In [3]:
resonance_radical_df

Unnamed: 0,radical_smiles,resonance_radical_smiles,has_H,has_C,has_N,has_O,Mw (g/mol),num_rings,num_heavy_atoms,num_rotatable_bonds,radical_atom_type
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,True,True,False,True,122.992963,0,8,4,O
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,True,True,False,True,122.992963,0,8,3,O
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,True,True,False,True,106.998048,0,7,3,C
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],True,True,False,True,195.014092,0,13,8,O
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,True,True,False,True,106.998048,0,7,5,C
...,...,...,...,...,...,...,...,...,...,...,...
2800,C1=C[CH]C=1,[C]1=CC=C1,True,True,False,False,51.023475,1,4,0,C
2801,C=C=C1C=[C]C1,C=C=C1C=[C]C1,True,True,False,False,77.039125,1,6,0,C
2802,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,True,True,False,False,77.039125,1,6,1,C
2803,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,True,True,False,False,77.039125,1,6,0,C


In [4]:
radical_data_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2205,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105
2206,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319
2207,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640
2208,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423


# Join data

In [5]:
temp_radical_data_df = radical_data_df
temp_radical_data_df = temp_radical_data_df.rename(columns={column: "radical_"+column for column in radical_data_df.columns})
temp_radical_data_df = temp_radical_data_df.drop(columns="radical_S298 (cal/mol/K)")
temp_radical_data_df

Unnamed: 0,radical_smiles,radical_H298 (kcal/mol),radical_Sint298 (cal/mol/K),radical_source,radical_level_of_theory,radical_Cp300 (cal/mol/K),radical_Cp400 (cal/mol/K),radical_Cp500 (cal/mol/K),radical_Cp600 (cal/mol/K),radical_Cp800 (cal/mol/K),radical_Cp1000 (cal/mol/K),radical_Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,-223.514126,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371
2,O=C(O)O[C](O)O,-187.322869,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777
4,O=[C]OC(O)(O)O,-180.497563,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589
...,...,...,...,...,...,...,...,...,...,...,...,...
2205,C=C=C1[CH]C1=C,148.970420,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105
2206,[CH]=C1C=C1,152.166735,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319
2207,C1=C[CH]C=1,156.106582,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640
2208,C=C=C1C=[C]C1,157.733085,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423


In [6]:
temp_closed_shell_data_df = closed_shell_data_df.drop(columns="closed_shell_S298 (cal/mol/K)")

In [7]:
temp_closed_shell_data_df["resonance_radical_smiles"].value_counts()

[O]C(=O)OC(O)(O)O           1
C=C(C)COOC1C=C(C)[CH]CC1    1
C=C([CH]C)OO                1
[CH2]C(=CC)OO               1
C=C(C)C[C]=O                1
                           ..
[O]CO                       1
CC(CCOO)O[O]                1
[CH2]C(=C)C(C)(CO)OO        1
[CH2]OCOC                   1
C=C1C=C=[C]C1               1
Name: resonance_radical_smiles, Length: 2805, dtype: int64

In [8]:
hbi_df = resonance_radical_df[["radical_smiles", "resonance_radical_smiles", "num_rotatable_bonds"]]
hbi_df = hbi_df.rename(columns={"num_rotatable_bonds": "resonance_radical_num_rotatable_bonds"})
hbi_df = hbi_df.merge(temp_radical_data_df, on="radical_smiles", how="left")
hbi_df = hbi_df.merge(temp_closed_shell_data_df, on="resonance_radical_smiles", how="left")
hbi_df

Unnamed: 0,radical_smiles,resonance_radical_smiles,resonance_radical_num_rotatable_bonds,radical_H298 (kcal/mol),radical_Sint298 (cal/mol/K),radical_source,radical_level_of_theory,radical_Cp300 (cal/mol/K),radical_Cp400 (cal/mol/K),radical_Cp500 (cal/mol/K),...,closed_shell_Cp300 (cal/mol/K),closed_shell_Cp400 (cal/mol/K),closed_shell_Cp500 (cal/mol/K),closed_shell_Cp600 (cal/mol/K),closed_shell_Cp800 (cal/mol/K),closed_shell_Cp1000 (cal/mol/K),closed_shell_Cp1500 (cal/mol/K),closed_shell_Sint298 (cal/mol/K),closed_shell_thermo_comment,closed_shell_thermo_source
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,4,-223.514126,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,...,31.562314,36.827897,41.095602,44.234417,48.821415,51.772505,56.215621,100.589618,Thermo group additivity estimation: group(O2s-...,GAV
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,3,-223.423390,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,...,31.562314,36.827897,41.095602,44.234417,48.821415,51.772505,56.215621,100.589618,Thermo group additivity estimation: group(O2s-...,GAV
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,3,-187.322869,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,...,27.214474,31.965583,36.032333,39.309560,44.059044,47.141300,50.458700,94.127419,Thermo group additivity estimation: group(O2s-...,GAV
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],8,-185.660577,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,...,52.706616,62.438776,71.442505,78.070363,87.443365,95.103078,102.679981,145.026864,Thermo group additivity estimation: group(O2s-...,GAV
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,5,-180.497563,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,...,26.935163,30.962696,34.637667,37.592447,42.375430,45.749560,50.979006,94.867820,Thermo group additivity estimation: group(O2s-...,GAV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2800,C1=C[CH]C=1,[C]1=CC=C1,0,156.106582,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,...,15.504000,19.920000,23.474000,26.261000,30.329000,33.199000,37.574000,61.950300,Thermo group additivity estimation: group(Cds-...,GAV
2801,C=C=C1C=[C]C1,C=C=C1C=[C]C1,0,157.733085,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,...,22.416919,29.489266,35.000461,39.185145,45.162263,49.240044,55.323950,51.772385,Thermo group additivity estimation: group(Cs-(...,GAV
2802,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,1,163.498344,75.963844,dong_pio_liang.py,CBS-QB3,22.606638,28.495118,33.391100,...,23.409000,30.017000,34.619000,39.828000,46.529000,51.356000,56.349000,75.140000,Thermo group additivity estimation: group(Cs-(...,GAV
2803,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,0,163.498344,75.963844,dong_pio_liang.py,CBS-QB3,22.606638,28.495118,33.391100,...,22.819000,29.017000,34.189000,38.448000,45.419000,50.576000,55.959000,75.190000,Thermo group additivity estimation: group(Cs-(...,GAV


# Derive HBI corrections

In [9]:
properties = ["H298 (kcal/mol)", "Sint298 (cal/mol/K)"]
for T in Ts:
    properties.append(f"Cp{T} (cal/mol/K)")

hbi_df[f"HBI_H298 (kcal/mol)"] = hbi_df[f"radical_H298 (kcal/mol)"] - hbi_df[f"closed_shell_H298 (kcal/mol)"] + 52.1

for prop in properties[1:]:
    hbi_df[f"HBI_{prop}"] = hbi_df[f"radical_{prop}"] - hbi_df[f"closed_shell_{prop}"]
    
hbi_df

Unnamed: 0,radical_smiles,resonance_radical_smiles,resonance_radical_num_rotatable_bonds,radical_H298 (kcal/mol),radical_Sint298 (cal/mol/K),radical_source,radical_level_of_theory,radical_Cp300 (cal/mol/K),radical_Cp400 (cal/mol/K),radical_Cp500 (cal/mol/K),...,closed_shell_thermo_source,HBI_H298 (kcal/mol),HBI_Sint298 (cal/mol/K),HBI_Cp300 (cal/mol/K),HBI_Cp400 (cal/mol/K),HBI_Cp500 (cal/mol/K),HBI_Cp600 (cal/mol/K),HBI_Cp800 (cal/mol/K),HBI_Cp1000 (cal/mol/K),HBI_Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,4,-223.514126,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,...,GAV,101.203465,-4.589644,2.364565,1.559776,0.412894,-0.172027,-1.041685,-1.683176,-3.705453
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,3,-223.423390,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,...,GAV,101.294200,-7.808181,0.968866,0.741762,0.402841,0.476043,0.534574,0.412253,-1.394250
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,3,-187.322869,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,...,GAV,96.140974,-4.610350,1.758768,2.587490,2.714389,2.346110,1.178471,0.282207,-0.997631
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],8,-185.660577,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,...,GAV,100.658735,-12.393484,3.863088,3.589450,2.161239,1.702481,1.262146,-0.928170,-3.176204
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,5,-180.497563,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,...,GAV,88.101481,-4.835845,5.443029,5.881647,4.696512,3.708683,1.768296,0.144000,-3.234417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2800,C1=C[CH]C=1,[C]1=CC=C1,0,156.106582,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,...,GAV,103.873082,3.435468,-0.850889,-1.431243,-1.911856,-2.405821,-3.006968,-3.449835,-4.556360
2801,C=C=C1C=[C]C1,C=C=C1C=[C]C1,0,157.733085,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,...,GAV,118.466211,25.815240,0.258337,-1.155381,-2.024215,-2.373362,-2.314548,-2.090808,-2.168527
2802,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,1,163.498344,75.963844,dong_pio_liang.py,CBS-QB3,22.606638,28.495118,33.391100,...,GAV,84.371344,0.823844,-0.802362,-1.521882,-1.227900,-2.500041,-3.127836,-3.711522,-2.960733
2803,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,0,163.498344,75.963844,dong_pio_liang.py,CBS-QB3,22.606638,28.495118,33.391100,...,GAV,78.831344,0.773844,-0.212362,-0.521882,-0.797900,-1.120041,-2.017836,-2.931522,-2.570733


In [10]:
hbi_df.to_csv("../data/hbi.csv", index=False)