# Get corresponding closed shells and data

In [14]:
import sys
sys.path.insert(0,"/home/gridsan/hwpang/Software/RMG-Py/")
sys.path.insert(0,"..")

import random
import os
import time
import math
from copy import deepcopy
import pandas as pd
from joblib import Parallel, delayed
from tqdm import tqdm

from rmgpy.data.thermo import ThermoDatabase, ThermoLibrary, ThermoData, remove_thermo_data, add_thermo_data, NASA
from rmgpy.molecule import Molecule
from rmgpy.species import Species
from rmgpy import settings
from rmgpy import constants

from tree.utils import load_thermo_lib_by_path, generate_thermo
from tree.parameters import Ts

# Load resonance radicals

In [15]:
resonance_radical_df = pd.read_csv("../data/resonance_radicals.csv")

# Generate closed shell structures

In [16]:
closed_shell_df = resonance_radical_df[["resonance_radical_smiles"]]

In [17]:
def generate_closed_shell_smi(smi):
    mol = Molecule().from_smiles(smi)
    mol.saturate_radicals()
    mol.atoms.sort()
    return mol.to_smiles()

generate_closed_shell_smi("CC1OC1OC[O]")

'CC1OC1OCO'

In [18]:
closed_shell_df["closed_shell_smiles"] = closed_shell_df["resonance_radical_smiles"].apply(generate_closed_shell_smi)
closed_shell_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,resonance_radical_smiles,closed_shell_smiles
0,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O
1,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O
2,O=C(O)O[C](O)O,O=C(O)OC(O)O
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO
4,O=[C]OC(O)(O)O,O=COC(O)(O)O
...,...,...
2804,[C]1=CC=C1,C1=CC=C1
2805,C=C=C1C=[C]C1,C=C=C1C=CC1
2806,[CH2]C1=CC#CC1,CC1=CC#CC1
2807,C=C1[CH]C#CC1,C=C1CC#CC1


In [19]:
closed_shell_df.to_csv("../data/closed_shells.csv", index=False)

# Generate closed shell thermo

In [20]:
lib_paths = [
    "../data/dong_pio_liang.py",
    "../data/pang.py",
    "../data/johnson_cbs-qb3.py",
    "../data/johnson_g4.py",
]

In [21]:
# load thermo database

thermo_database = ThermoDatabase()
thermo_database.load_groups(os.path.join(settings["database.directory"], "thermo", "groups"))
for path in lib_paths:
    load_thermo_lib_by_path(path, thermo_database)
    thermo_database.library_order.remove(path)
thermo_database.library_order = lib_paths + thermo_database.library_order

The thermodynamics library ../data/dong_pio_liang.py is loaded.
The thermodynamics library ../data/pang.py is loaded.
The thermodynamics library ../data/johnson_cbs-qb3.py is loaded.
The thermodynamics library ../data/johnson_g4.py is loaded.


In [None]:
# takes ~1 min 30 sec
closed_shell_thermos = [generate_thermo(thermo_database, smi, resonance=False) for smi in tqdm(closed_shell_df["closed_shell_smiles"])]


 33%|███▎      | 915/2809 [00:12<00:19, 97.27it/s]

In [None]:
closed_shell_df["closed_shell_H298 (kcal/mol)"] = [thermo[0] for thermo in closed_shell_thermos]
closed_shell_df["closed_shell_S298 (cal/mol/K)"] = [thermo[1] for thermo in closed_shell_thermos]
for i, T in enumerate(Ts):
    closed_shell_df[f"closed_shell_Cp{T} (cal/mol/K)"] = [thermo[2][i] for thermo in closed_shell_thermos]
closed_shell_df["closed_shell_Sint298 (cal/mol/K)"] = [thermo[3] for thermo in closed_shell_thermos]
closed_shell_df["closed_shell_thermo_comment"] = [thermo[4] for thermo in closed_shell_thermos]
closed_shell_df

In [None]:
def thermo_source(comment):
    if "Thermo group additivity estimation" in comment:
        return "GAV"
    else:
        return comment
closed_shell_df["closed_shell_thermo_source"] = closed_shell_df["closed_shell_thermo_comment"].apply(thermo_source)
closed_shell_df

In [None]:
closed_shell_df["closed_shell_thermo_source"].value_counts()

In [None]:
closed_shell_df.to_csv("../data/closed_shell_data.csv", index=False)