# Get HBI data

In [2]:
import sys
sys.path.insert(0,"/home/gridsan/hwpang/Software/RMG-Py/")

import random
import os
import time
import math
from copy import deepcopy
import pandas as pd
from joblib import Parallel, delayed
from tqdm import tqdm

from utils import load_thermo_lib_by_path, generate_thermo

from rmgpy.data.thermo import ThermoDatabase, ThermoLibrary, ThermoData, remove_thermo_data, add_thermo_data, NASA
from rmgpy.molecule import Molecule
from rmgpy.species import Species
from rmgpy import settings
from rmgpy import constants

# Generate resonance structure data

In [66]:
radical_df = pd.read_csv("../data/radical.csv")
radical_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K),num_resonance
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168,1
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371,1
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069,1
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777,1
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2232,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105,3
2233,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319,1
2234,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640,2
2235,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423,1


In [67]:
def generate_radical_resonance_smis(smi):
    rad = Molecule().from_smiles(smi)
    rads = [r for r in rad.generate_resonance_structures() if sum(atom.radical_electrons for atom in r.atoms)==1]
    for r in rads:
        r.atoms.sort()
    return [r.to_smiles() for r in rads]
    
generate_radical_resonance_smis("C[CH]C(=O)OCO")

['C[CH]C(=O)OCO', 'CC=C([O])OCO']

In [68]:
resonance_smiss = [generate_radical_resonance_smis(smi) for smi in tqdm(radical_df["smiles"])]

100%|██████████| 2237/2237 [00:25<00:00, 86.91it/s] 


In [69]:
temp_dict = {
    "radical_smiles": [],
    "radical_resonance_smiles": [],
}

In [70]:
for smi, resonance_smis in zip(radical_df["smiles"], resonance_smiss):
    for resonance_smi in resonance_smis:
        temp_dict["radical_smiles"].append(smi)
        temp_dict["radical_resonance_smiles"].append(resonance_smi)

HBI_correction_df = pd.DataFrame(temp_dict)
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O]
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O
...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1


In [71]:
HBI_correction_df.to_csv("../data/hbi.csv", index=False)

# Generate closed shell structures

In [72]:
def generate_closed_shell_smi(smi):
    mol = Molecule().from_smiles(smi)
    mol.saturate_radicals()
    mol.atoms.sort()
    return mol.to_smiles()

generate_closed_shell_smi("CC1OC1OC[O]")

'CC1OC1OCO'

In [73]:
HBI_correction_df = pd.read_csv("../data/hbi.csv")

In [74]:
HBI_correction_df["closed_shell_smiles"] = HBI_correction_df["radical_resonance_smiles"].apply(generate_closed_shell_smi)
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles,closed_shell_smiles
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,O=C(O)OC(O)O
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,O=COC(O)(O)O
...,...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1,C1=CC=C1
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1,C=C=C1C=CC1
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,CC1=CC#CC1
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,C=C1CC#CC1


In [75]:
HBI_correction_df.to_csv("../data/hbi.csv", index=False)

# Generate closed shell thermo

In [76]:
HBI_correction_df = pd.read_csv("../data/hbi.csv")

In [77]:
lib_paths = [
    "../data/dong_pio_liang.py",
    "../data/pang.py",
    "../data/johnson_cbs-qb3.py",
    "../data/johnson_g4.py",
]
Ts = [300,400,500,600,800,1000,1500]

In [78]:
# load thermo database

thermo_database = ThermoDatabase()
thermo_database.load_groups(os.path.join(settings["database.directory"], "thermo", "groups"))
for path in lib_paths:
    load_thermo_lib_by_path(path, thermo_database)
    thermo_database.library_order.remove(path)
thermo_database.library_order = lib_paths + thermo_database.library_order

The thermodynamics library ../data/dong_pio_liang.py is loaded.
The thermodynamics library ../data/pang.py is loaded.
The thermodynamics library ../data/johnson_cbs-qb3.py is loaded.
The thermodynamics library ../data/johnson_g4.py is loaded.


In [80]:
# takes ~1 min 30 sec
closed_shell_thermos = [generate_thermo(thermo_database, smi, resonance=False) for smi in tqdm(HBI_correction_df["closed_shell_smiles"])]


100%|██████████| 2867/2867 [01:14<00:00, 38.37it/s]


In [81]:
HBI_correction_df["closed_shell_H298 (kcal/mol)"] = [thermo[0] for thermo in closed_shell_thermos]
HBI_correction_df["closed_shell_S298 (cal/mol/K)"] = [thermo[1] for thermo in closed_shell_thermos]
for i, T in enumerate(Ts):
    HBI_correction_df[f"closed_shell_Cp{T} (cal/mol/K)"] = [thermo[2][i] for thermo in closed_shell_thermos]
HBI_correction_df["closed_shell_Sint298 (cal/mol/K)"] = [thermo[3] for thermo in closed_shell_thermos]
HBI_correction_df["closed_shell_thermo_comment"] = [thermo[4] for thermo in closed_shell_thermos]
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles,closed_shell_smiles,closed_shell_H298 (kcal/mol),closed_shell_S298 (cal/mol/K),closed_shell_Cp300 (cal/mol/K),closed_shell_Cp400 (cal/mol/K),closed_shell_Cp500 (cal/mol/K),closed_shell_Cp600 (cal/mol/K),closed_shell_Cp800 (cal/mol/K),closed_shell_Cp1000 (cal/mol/K),closed_shell_Cp1500 (cal/mol/K),closed_shell_Sint298 (cal/mol/K),closed_shell_thermo_comment
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,51.772505,56.215621,100.589618,Thermo group additivity estimation: group(O2s-...
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,51.772505,56.215621,100.589618,Thermo group additivity estimation: group(O2s-...
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,O=C(O)OC(O)O,-231.363843,94.127419,27.214474,31.965583,36.032333,39.309560,44.059044,47.141300,50.458700,94.127419,Thermo group additivity estimation: group(O2s-...
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO,-234.219312,145.598548,52.706616,62.438776,71.442505,78.070363,87.443365,95.103078,102.679981,145.026864,Thermo group additivity estimation: group(O2s-...
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,O=COC(O)(O)O,-216.499044,92.684651,26.935163,30.962696,34.637667,37.592447,42.375430,45.749560,50.979006,94.867820,Thermo group additivity estimation: group(O2s-...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1,C1=CC=C1,104.333500,57.818020,15.504000,19.920000,23.474000,26.261000,30.329000,33.199000,37.574000,61.950300,Thermo group additivity estimation: group(Cds-...
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1,C=C=C1C=CC1,91.366874,50.394958,22.416919,29.489266,35.000461,39.185145,45.162263,49.240044,55.323950,51.772385,Thermo group additivity estimation: group(Cs-(...
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,CC1=CC#CC1,131.227000,72.956831,23.409000,30.017000,34.619000,39.828000,46.529000,51.356000,56.349000,75.140000,Thermo group additivity estimation: group(Cs-(...
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,C=C1CC#CC1,136.767000,72.435147,22.819000,29.017000,34.189000,38.448000,45.419000,50.576000,55.959000,75.190000,Thermo group additivity estimation: group(Cs-(...


In [82]:
def thermo_source(comment):
    if "Thermo group additivity estimation" in comment:
        return "GAV"
    else:
        return comment
HBI_correction_df["closed_shell_thermo_source"] = HBI_correction_df["closed_shell_thermo_comment"].apply(thermo_source)
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles,closed_shell_smiles,closed_shell_H298 (kcal/mol),closed_shell_S298 (cal/mol/K),closed_shell_Cp300 (cal/mol/K),closed_shell_Cp400 (cal/mol/K),closed_shell_Cp500 (cal/mol/K),closed_shell_Cp600 (cal/mol/K),closed_shell_Cp800 (cal/mol/K),closed_shell_Cp1000 (cal/mol/K),closed_shell_Cp1500 (cal/mol/K),closed_shell_Sint298 (cal/mol/K),closed_shell_thermo_comment,closed_shell_thermo_source
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,51.772505,56.215621,100.589618,Thermo group additivity estimation: group(O2s-...,GAV
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,51.772505,56.215621,100.589618,Thermo group additivity estimation: group(O2s-...,GAV
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,O=C(O)OC(O)O,-231.363843,94.127419,27.214474,31.965583,36.032333,39.309560,44.059044,47.141300,50.458700,94.127419,Thermo group additivity estimation: group(O2s-...,GAV
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO,-234.219312,145.598548,52.706616,62.438776,71.442505,78.070363,87.443365,95.103078,102.679981,145.026864,Thermo group additivity estimation: group(O2s-...,GAV
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,O=COC(O)(O)O,-216.499044,92.684651,26.935163,30.962696,34.637667,37.592447,42.375430,45.749560,50.979006,94.867820,Thermo group additivity estimation: group(O2s-...,GAV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1,C1=CC=C1,104.333500,57.818020,15.504000,19.920000,23.474000,26.261000,30.329000,33.199000,37.574000,61.950300,Thermo group additivity estimation: group(Cds-...,GAV
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1,C=C=C1C=CC1,91.366874,50.394958,22.416919,29.489266,35.000461,39.185145,45.162263,49.240044,55.323950,51.772385,Thermo group additivity estimation: group(Cs-(...,GAV
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,CC1=CC#CC1,131.227000,72.956831,23.409000,30.017000,34.619000,39.828000,46.529000,51.356000,56.349000,75.140000,Thermo group additivity estimation: group(Cs-(...,GAV
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,C=C1CC#CC1,136.767000,72.435147,22.819000,29.017000,34.189000,38.448000,45.419000,50.576000,55.959000,75.190000,Thermo group additivity estimation: group(Cs-(...,GAV


In [83]:
HBI_correction_df["closed_shell_thermo_source"].value_counts()

GAV                                           1864
Thermo library: ../data/dong_pio_liang.py      876
Thermo library: ../data/pang.py                 99
Thermo library: ../data/johnson_g4.py           22
Thermo library: ../data/johnson_cbs-qb3.py       6
Name: closed_shell_thermo_source, dtype: int64

In [84]:
HBI_correction_df.to_csv("../data/hbi.csv", index=False)

# Derive HBI corrections

In [85]:
radical_df = pd.read_csv("../data/radical.csv")
radical_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K),num_resonance
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168,1
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371,1
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069,1
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777,1
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2232,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105,3
2233,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319,1
2234,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640,2
2235,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423,1


In [86]:
HBI_correction_df = pd.read_csv("../data/hbi.csv")
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles,closed_shell_smiles,closed_shell_H298 (kcal/mol),closed_shell_S298 (cal/mol/K),closed_shell_Cp300 (cal/mol/K),closed_shell_Cp400 (cal/mol/K),closed_shell_Cp500 (cal/mol/K),closed_shell_Cp600 (cal/mol/K),closed_shell_Cp800 (cal/mol/K),closed_shell_Cp1000 (cal/mol/K),closed_shell_Cp1500 (cal/mol/K),closed_shell_Sint298 (cal/mol/K),closed_shell_thermo_comment,closed_shell_thermo_source
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,51.772505,56.215621,100.589618,Thermo group additivity estimation: group(O2s-...,GAV
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,51.772505,56.215621,100.589618,Thermo group additivity estimation: group(O2s-...,GAV
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,O=C(O)OC(O)O,-231.363843,94.127419,27.214474,31.965583,36.032333,39.309560,44.059044,47.141300,50.458700,94.127419,Thermo group additivity estimation: group(O2s-...,GAV
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO,-234.219312,145.598548,52.706616,62.438776,71.442505,78.070363,87.443365,95.103078,102.679981,145.026864,Thermo group additivity estimation: group(O2s-...,GAV
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,O=COC(O)(O)O,-216.499044,92.684651,26.935163,30.962696,34.637667,37.592447,42.375430,45.749560,50.979006,94.867820,Thermo group additivity estimation: group(O2s-...,GAV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1,C1=CC=C1,104.333500,57.818020,15.504000,19.920000,23.474000,26.261000,30.329000,33.199000,37.574000,61.950300,Thermo group additivity estimation: group(Cds-...,GAV
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1,C=C=C1C=CC1,91.366874,50.394958,22.416919,29.489266,35.000461,39.185145,45.162263,49.240044,55.323950,51.772385,Thermo group additivity estimation: group(Cs-(...,GAV
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,CC1=CC#CC1,131.227000,72.956831,23.409000,30.017000,34.619000,39.828000,46.529000,51.356000,56.349000,75.140000,Thermo group additivity estimation: group(Cs-(...,GAV
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,C=C1CC#CC1,136.767000,72.435147,22.819000,29.017000,34.189000,38.448000,45.419000,50.576000,55.959000,75.190000,Thermo group additivity estimation: group(Cs-(...,GAV


In [87]:
temp_df = pd.DataFrame()

for column in radical_df.columns:
    temp_df[f"radical_{column}"] = radical_df[column]

temp_df

Unnamed: 0,radical_smiles,radical_H298 (kcal/mol),radical_S298 (cal/mol/K),radical_Sint298 (cal/mol/K),radical_source,radical_level_of_theory,radical_Cp300 (cal/mol/K),radical_Cp400 (cal/mol/K),radical_Cp500 (cal/mol/K),radical_Cp600 (cal/mol/K),radical_Cp800 (cal/mol/K),radical_Cp1000 (cal/mol/K),radical_Cp1500 (cal/mol/K),radical_num_resonance
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168,1
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371,1
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069,1
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777,1
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2232,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105,3
2233,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319,1
2234,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640,2
2235,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423,1


In [88]:
HBI_correction_df = HBI_correction_df.merge(temp_df, how="left", on="radical_smiles")
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles,closed_shell_smiles,closed_shell_H298 (kcal/mol),closed_shell_S298 (cal/mol/K),closed_shell_Cp300 (cal/mol/K),closed_shell_Cp400 (cal/mol/K),closed_shell_Cp500 (cal/mol/K),closed_shell_Cp600 (cal/mol/K),closed_shell_Cp800 (cal/mol/K),...,radical_source,radical_level_of_theory,radical_Cp300 (cal/mol/K),radical_Cp400 (cal/mol/K),radical_Cp500 (cal/mol/K),radical_Cp600 (cal/mol/K),radical_Cp800 (cal/mol/K),radical_Cp1000 (cal/mol/K),radical_Cp1500 (cal/mol/K),radical_num_resonance
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,...,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168,1
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,...,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371,1
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,O=C(O)OC(O)O,-231.363843,94.127419,27.214474,31.965583,36.032333,39.309560,44.059044,...,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069,1
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO,-234.219312,145.598548,52.706616,62.438776,71.442505,78.070363,87.443365,...,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777,1
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,O=COC(O)(O)O,-216.499044,92.684651,26.935163,30.962696,34.637667,37.592447,42.375430,...,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1,C1=CC=C1,104.333500,57.818020,15.504000,19.920000,23.474000,26.261000,30.329000,...,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640,2
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1,C=C=C1C=CC1,91.366874,50.394958,22.416919,29.489266,35.000461,39.185145,45.162263,...,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423,1
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,CC1=CC#CC1,131.227000,72.956831,23.409000,30.017000,34.619000,39.828000,46.529000,...,dong_pio_liang.py,CBS-QB3,22.606638,28.495118,33.391100,37.327959,43.401164,47.644478,53.388267,3
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,C=C1CC#CC1,136.767000,72.435147,22.819000,29.017000,34.189000,38.448000,45.419000,...,dong_pio_liang.py,CBS-QB3,22.606638,28.495118,33.391100,37.327959,43.401164,47.644478,53.388267,3


In [89]:
properties = ["H298 (kcal/mol)", "Sint298 (cal/mol/K)"]
for T in Ts:
    properties.append(f"Cp{T} (cal/mol/K)")

for prop in properties:
    HBI_correction_df[f"HBI_{prop}"] = HBI_correction_df[f"radical_{prop}"] - HBI_correction_df[f"closed_shell_{prop}"]
    
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles,closed_shell_smiles,closed_shell_H298 (kcal/mol),closed_shell_S298 (cal/mol/K),closed_shell_Cp300 (cal/mol/K),closed_shell_Cp400 (cal/mol/K),closed_shell_Cp500 (cal/mol/K),closed_shell_Cp600 (cal/mol/K),closed_shell_Cp800 (cal/mol/K),...,radical_num_resonance,HBI_H298 (kcal/mol),HBI_Sint298 (cal/mol/K),HBI_Cp300 (cal/mol/K),HBI_Cp400 (cal/mol/K),HBI_Cp500 (cal/mol/K),HBI_Cp600 (cal/mol/K),HBI_Cp800 (cal/mol/K),HBI_Cp1000 (cal/mol/K),HBI_Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,...,1,49.103465,-4.589644,2.364565,1.559776,0.412894,-0.172027,-1.041685,-1.683176,-3.705453
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,...,1,49.194200,-7.808181,0.968866,0.741762,0.402841,0.476043,0.534574,0.412253,-1.394250
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,O=C(O)OC(O)O,-231.363843,94.127419,27.214474,31.965583,36.032333,39.309560,44.059044,...,1,44.040974,-4.610350,1.758768,2.587490,2.714389,2.346110,1.178471,0.282207,-0.997631
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO,-234.219312,145.598548,52.706616,62.438776,71.442505,78.070363,87.443365,...,1,48.558735,-12.393484,3.863088,3.589450,2.161239,1.702481,1.262146,-0.928170,-3.176204
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,O=COC(O)(O)O,-216.499044,92.684651,26.935163,30.962696,34.637667,37.592447,42.375430,...,1,36.001481,-4.835845,5.443029,5.881647,4.696512,3.708683,1.768296,0.144000,-3.234417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1,C1=CC=C1,104.333500,57.818020,15.504000,19.920000,23.474000,26.261000,30.329000,...,2,51.773082,3.435468,-0.850889,-1.431243,-1.911856,-2.405821,-3.006968,-3.449835,-4.556360
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1,C=C=C1C=CC1,91.366874,50.394958,22.416919,29.489266,35.000461,39.185145,45.162263,...,1,66.366211,25.815240,0.258337,-1.155381,-2.024215,-2.373362,-2.314548,-2.090808,-2.168527
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,CC1=CC#CC1,131.227000,72.956831,23.409000,30.017000,34.619000,39.828000,46.529000,...,3,32.271344,0.823844,-0.802362,-1.521882,-1.227900,-2.500041,-3.127836,-3.711522,-2.960733
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,C=C1CC#CC1,136.767000,72.435147,22.819000,29.017000,34.189000,38.448000,45.419000,...,3,26.731344,0.773844,-0.212362,-0.521882,-0.797900,-1.120041,-2.017836,-2.931522,-2.570733


In [90]:
HBI_correction_df.to_csv("../data/hbi.csv", index=False)

# Stats

In [91]:
HBI_correction_df = pd.read_csv("../data/hbi.csv")
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles,closed_shell_smiles,closed_shell_H298 (kcal/mol),closed_shell_S298 (cal/mol/K),closed_shell_Cp300 (cal/mol/K),closed_shell_Cp400 (cal/mol/K),closed_shell_Cp500 (cal/mol/K),closed_shell_Cp600 (cal/mol/K),closed_shell_Cp800 (cal/mol/K),...,radical_num_resonance,HBI_H298 (kcal/mol),HBI_Sint298 (cal/mol/K),HBI_Cp300 (cal/mol/K),HBI_Cp400 (cal/mol/K),HBI_Cp500 (cal/mol/K),HBI_Cp600 (cal/mol/K),HBI_Cp800 (cal/mol/K),HBI_Cp1000 (cal/mol/K),HBI_Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,...,1,49.103465,-4.589644,2.364565,1.559776,0.412894,-0.172027,-1.041685,-1.683176,-3.705453
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,...,1,49.194200,-7.808181,0.968866,0.741762,0.402841,0.476043,0.534574,0.412253,-1.394250
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,O=C(O)OC(O)O,-231.363843,94.127419,27.214474,31.965583,36.032333,39.309560,44.059044,...,1,44.040974,-4.610350,1.758768,2.587490,2.714389,2.346110,1.178471,0.282207,-0.997631
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO,-234.219312,145.598548,52.706616,62.438776,71.442505,78.070363,87.443365,...,1,48.558735,-12.393484,3.863088,3.589450,2.161239,1.702481,1.262146,-0.928170,-3.176204
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,O=COC(O)(O)O,-216.499044,92.684651,26.935163,30.962696,34.637667,37.592447,42.375430,...,1,36.001481,-4.835845,5.443029,5.881647,4.696512,3.708683,1.768296,0.144000,-3.234417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1,C1=CC=C1,104.333500,57.818020,15.504000,19.920000,23.474000,26.261000,30.329000,...,2,51.773082,3.435468,-0.850889,-1.431243,-1.911856,-2.405821,-3.006968,-3.449835,-4.556360
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1,C=C=C1C=CC1,91.366874,50.394958,22.416919,29.489266,35.000461,39.185145,45.162263,...,1,66.366211,25.815240,0.258337,-1.155381,-2.024215,-2.373362,-2.314548,-2.090808,-2.168527
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,CC1=CC#CC1,131.227000,72.956831,23.409000,30.017000,34.619000,39.828000,46.529000,...,3,32.271344,0.823844,-0.802362,-1.521882,-1.227900,-2.500041,-3.127836,-3.711522,-2.960733
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,C=C1CC#CC1,136.767000,72.435147,22.819000,29.017000,34.189000,38.448000,45.419000,...,3,26.731344,0.773844,-0.212362,-0.521882,-0.797900,-1.120041,-2.017836,-2.931522,-2.570733


In [94]:
HBI_correction_df["closed_shell_thermo_source"].value_counts()

GAV                                           1864
Thermo library: ../data/dong_pio_liang.py      876
Thermo library: ../data/pang.py                 99
Thermo library: ../data/johnson_g4.py           22
Thermo library: ../data/johnson_cbs-qb3.py       6
Name: closed_shell_thermo_source, dtype: int64

In [95]:
876+99+6

981