# Get collected radical data

In [1]:
import sys
sys.path.insert(0,"/home/gridsan/hwpang/Software/RMG-Py/")
sys.path.insert(0,"..")

import random
import os
import time
import math
from copy import deepcopy
import pandas as pd
from joblib import Parallel, delayed
from tqdm import tqdm

from rmgpy.data.thermo import ThermoDatabase, ThermoLibrary, ThermoData, remove_thermo_data, add_thermo_data, NASA
from rmgpy.molecule import Molecule
from rmgpy.species import Species
from rmgpy import settings
from rmgpy import constants

from tree.utils import load_thermo_lib_by_path
from tree.parameters import Ts



# Extract radical data into DataFrame

In [2]:
lib_paths = [
    "../data/dong_pio_liang.py",
    "../data/pang.py",
    "../data/johnson_cbs-qb3.py",
    "../data/johnson_g4.py",
]

In [3]:
# load thermo database

thermo_database = ThermoDatabase()
thermo_database.load_groups(os.path.join(settings["database.directory"], "thermo", "groups"))
for path in lib_paths:
    load_thermo_lib_by_path(path, thermo_database)
    thermo_database.library_order.remove(path)
thermo_database.library_order = lib_paths + thermo_database.library_order

The thermodynamics library ../data/dong_pio_liang.py is loaded.
The thermodynamics library ../data/pang.py is loaded.
The thermodynamics library ../data/johnson_cbs-qb3.py is loaded.
The thermodynamics library ../data/johnson_g4.py is loaded.


In [4]:
temp_dict = {
    "smiles": [],
    "H298 (kcal/mol)": [],
    "S298 (cal/mol/K)": [],
    "Sint298 (cal/mol/K)": [],
    "source": [],
    "level_of_theory": []
}

for T in Ts:
    temp_dict[f"Cp{T} (cal/mol/K)"] = []

for lib_path in lib_paths:
    lib = thermo_database.libraries[lib_path]
    for entry in lib.entries.values():
        if sum(atom.radical_electrons for atom in entry.item.atoms)==1:
            radical = deepcopy(entry.item)
            radical_data = deepcopy(entry.data)

            temp_dict["smiles"].append(radical.to_smiles())
            temp_dict["source"].append(os.path.basename(lib_path))
            if lib_path == "../data/johnson_g4.py":
                    level_of_theory = "G4"
            else:
                level_of_theory = "CBS-QB3"
            temp_dict["level_of_theory"].append(level_of_theory)
            
            radical_data.Cp0 = (radical.calculate_cp0(), "J/(mol*K)")
            radical_data.CpInf = (radical.calculate_cpinf(), "J/(mol*K)")
            if not isinstance(radical_data, ThermoData):
                radical_data = radical_data.to_thermo_data()
                
            temp_dict["H298 (kcal/mol)"].append(radical_data.H298.value_si/1000/4.184)
            temp_dict["S298 (cal/mol/K)"].append(radical_data.S298.value_si/4.184)
            for Cp, T in zip(radical_data.Cpdata.value_si, Ts):
                temp_dict[f"Cp{T} (cal/mol/K)"].append(Cp/4.184)
            
            #remove entropy correction
            radical_data.S298.value_si += constants.R * math.log(radical.get_symmetry_number())
            temp_dict["Sint298 (cal/mol/K)"].append(radical_data.S298.value_si/4.184)
            
radical_df = pd.DataFrame(temp_dict)
radical_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K)
0,CC(=O)[C]1COOCO1,-67.914217,93.370863,95.554033,dong_pio_liang.py,CBS-QB3,32.548179,40.493247,47.732998,54.091191,63.903914,70.305379,79.373213
1,[CH]=C(O)CC,16.534713,79.835564,82.018734,dong_pio_liang.py,CBS-QB3,25.052968,30.762981,35.295645,39.127141,45.144168,49.416420,55.364041
2,C#C[CH2],83.953327,60.670664,62.048090,dong_pio_liang.py,CBS-QB3,14.675033,17.030238,18.850812,20.307804,22.661429,24.424874,27.186521
3,[CH]=C(C)CC,51.656190,82.347307,86.713646,dong_pio_liang.py,CBS-QB3,26.179714,31.786750,36.933842,41.581561,49.319870,55.081112,63.758339
4,CC(=O)OC1[CH]COO1,-70.123462,99.399211,101.582380,dong_pio_liang.py,CBS-QB3,33.514281,41.924448,49.154668,55.274645,64.701788,71.157404,79.323295
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2266,CC(COCO[O])OO,-71.481951,111.808461,112.614204,johnson_g4.py,G4,44.743185,53.264718,60.393729,66.299471,75.064848,80.699602,86.994426
2267,CC([CH]OCOO)OO,-66.073649,117.174198,117.979941,johnson_g4.py,G4,46.463535,55.149610,62.463469,68.518406,77.481097,83.189611,89.235066
2268,O=[C]C(O)COO,-61.935718,96.549519,95.172093,johnson_g4.py,G4,35.044633,38.940622,42.242565,45.021796,49.274397,52.176869,56.000299
2269,CC(COO)O[O],-34.400949,89.531816,90.337559,johnson_g4.py,G4,37.573519,46.238927,51.859398,55.588213,60.922046,64.218585,67.748833


In [5]:
radical_df = radical_df.sort_values("H298 (kcal/mol)", ascending=True)
radical_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K)
539,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168
567,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371
538,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069
1484,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777
77,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589
...,...,...,...,...,...,...,...,...,...,...,...,...,...
707,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105
658,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319
1222,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640
1664,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423


In [6]:
radical_df = radical_df.drop_duplicates(subset=["smiles"], keep="first")
radical_df = radical_df.reset_index(drop=True)
radical_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2231,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105
2232,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319
2233,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640
2234,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423


In [7]:
radical_df.to_csv("../data/radical_data.csv", index=False)