# Get resonance radicals

In [1]:
import sys
sys.path.insert(0,"/home/gridsan/hwpang/Software/RMG-Py/")
sys.path.insert(0, "..")

import random
import os
import time
import math
from copy import deepcopy
import pandas as pd
from joblib import Parallel, delayed
from tqdm import tqdm

from rdkit.Chem.Descriptors import ExactMolWt, NumRadicalElectrons
from rdkit.Chem.rdMolDescriptors import CalcNumRings, CalcNumHBD, CalcNumHeavyAtoms, CalcNumRotatableBonds
from rdkit.Chem import GetPeriodicTable

from rdmc.mol import RDKitMol

from rmgpy.data.thermo import ThermoDatabase, ThermoLibrary, ThermoData, remove_thermo_data, add_thermo_data, NASA
from rmgpy.molecule import Molecule
from rmgpy.species import Species
from rmgpy import settings
from rmgpy import constants

from tree.utils import load_thermo_lib_by_path, generate_thermo
from tree.parameters import Ts



# Generate resonance structures

In [2]:
radical_df = pd.read_csv("../data/radical_data.csv")
radical_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2230,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105
2231,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319
2232,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640
2233,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423


In [3]:
def generate_radical_resonance_smis(smi):
    rad = Molecule().from_smiles(smi)
    rads = [r for r in rad.generate_resonance_structures() if sum(atom.radical_electrons for atom in r.atoms)==1]
    for r in rads:
        r.atoms.sort()
    return [r.to_smiles() for r in rads]
    
generate_radical_resonance_smis("C[CH]C(=O)OCO")

['C[CH]C(=O)OCO', 'CC=C([O])OCO']

In [4]:
# takes a min
resonance_smiss = [generate_radical_resonance_smis(smi) for smi in tqdm(radical_df["smiles"])]

100%|██████████| 2235/2235 [00:14<00:00, 158.21it/s]


In [5]:
temp_dict = {
    "radical_smiles": [],
    "resonance_radical_smiles": [],
}

In [6]:
for smi, resonance_smis in zip(radical_df["smiles"], resonance_smiss):
    for resonance_smi in resonance_smis:
        temp_dict["radical_smiles"].append(smi)
        temp_dict["resonance_radical_smiles"].append(resonance_smi)

resonance_radical_df = pd.DataFrame(temp_dict)
resonance_radical_df

Unnamed: 0,radical_smiles,resonance_radical_smiles
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O]
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O
...,...,...
2858,C1=C[CH]C=1,[C]1=CC=C1
2859,C=C=C1C=[C]C1,C=C=C1C=[C]C1
2860,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1
2861,[CH2]C1=CC#CC1,C=C1[CH]C#CC1


In [7]:
resonance_radical_df = resonance_radical_df.drop_duplicates(subset=["resonance_radical_smiles"], keep="first")
resonance_radical_df = resonance_radical_df.reset_index(drop=True)
resonance_radical_df

Unnamed: 0,radical_smiles,resonance_radical_smiles
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O]
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O
...,...,...
2800,C1=C[CH]C=1,[C]1=CC=C1
2801,C=C=C1C=[C]C1,C=C=C1C=[C]C1
2802,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1
2803,[CH2]C1=CC#CC1,C=C1[CH]C#CC1


In [8]:
resonance_radical_df.to_csv("../data/resonance_radicals.csv", index=False)

# Get descriptors

In [9]:
resonance_radical_df = pd.read_csv("../data/resonance_radicals.csv")

In [10]:
def get_descriptors(smi):
    mol = RDKitMol.FromSmiles(smi)
    radical_atom = [atom for atom in mol.GetAtoms() if atom.GetNumRadicalElectrons()!=0][0]
    return set(mol.GetElementSymbols()), ExactMolWt(mol._mol), CalcNumRings(mol._mol), CalcNumHeavyAtoms(mol._mol), CalcNumRotatableBonds(mol._mol), radical_atom.GetSymbol()

get_descriptors("[CH3]")

({'C', 'H'}, 15.023475095999999, 0, 1, 0, 'C')

In [11]:
descriptorss = Parallel(backend="multiprocessing", n_jobs=4)(delayed(get_descriptors)(smi) for smi in tqdm(resonance_radical_df["resonance_radical_smiles"]))




100%|██████████| 2805/2805 [00:00<00:00, 4140.09it/s]


In [12]:
atom_symbolss = [descriptors[0] for descriptors in descriptorss]

In [13]:
all_atom_symbols = set()
for atom_symbols in atom_symbolss:
    all_atom_symbols.update(atom_symbols)
all_atom_symbols

{'C', 'H', 'N', 'O'}

In [14]:
PeriodicTable = GetPeriodicTable()
sorted_all_atom_symbols = list(all_atom_symbols)
sorted_all_atom_symbols.sort(key=lambda x: PeriodicTable.GetAtomicNumber(x))
sorted_all_atom_symbols

['H', 'C', 'N', 'O']

In [15]:
for atom_symbol in sorted_all_atom_symbols:
    resonance_radical_df[f"has_{atom_symbol}"] = [atom_symbol in atom_symbols for atom_symbols in atom_symbolss]
resonance_radical_df

Unnamed: 0,radical_smiles,resonance_radical_smiles,has_H,has_C,has_N,has_O
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,True,True,False,True
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,True,True,False,True
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,True,True,False,True
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],True,True,False,True
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,True,True,False,True
...,...,...,...,...,...,...
2800,C1=C[CH]C=1,[C]1=CC=C1,True,True,False,False
2801,C=C=C1C=[C]C1,C=C=C1C=[C]C1,True,True,False,False
2802,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,True,True,False,False
2803,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,True,True,False,False


In [16]:
resonance_radical_df["Mw (g/mol)"] = [descriptors[1] for descriptors in descriptorss]

In [17]:
resonance_radical_df["num_rings"] = [descriptors[2] for descriptors in descriptorss]

In [18]:
resonance_radical_df["num_heavy_atoms"] = [descriptors[3] for descriptors in descriptorss]

In [19]:
resonance_radical_df["num_rotatable_bonds"] = [descriptors[4] for descriptors in descriptorss]

In [20]:
resonance_radical_df["radical_atom_type"] = [descriptors[5] for descriptors in descriptorss]

In [21]:
resonance_radical_df

Unnamed: 0,radical_smiles,resonance_radical_smiles,has_H,has_C,has_N,has_O,Mw (g/mol),num_rings,num_heavy_atoms,num_rotatable_bonds,radical_atom_type
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,True,True,False,True,122.992963,0,8,4,O
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,True,True,False,True,122.992963,0,8,3,O
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,True,True,False,True,106.998048,0,7,3,C
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],True,True,False,True,195.014092,0,13,8,O
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,True,True,False,True,106.998048,0,7,5,C
...,...,...,...,...,...,...,...,...,...,...,...
2800,C1=C[CH]C=1,[C]1=CC=C1,True,True,False,False,51.023475,1,4,0,C
2801,C=C=C1C=[C]C1,C=C=C1C=[C]C1,True,True,False,False,77.039125,1,6,0,C
2802,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,True,True,False,False,77.039125,1,6,1,C
2803,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,True,True,False,False,77.039125,1,6,0,C


In [22]:
resonance_radical_df.to_csv("../data/resonance_radicals.csv", index=False)

# Drop duplicates in radical data

In [23]:
radical_data_df = pd.read_csv("../data/radical_data.csv")
radical_data_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2230,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105
2231,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319
2232,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640
2233,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423


In [24]:
resonance_radical_df = pd.read_csv("../data/resonance_radicals.csv")
resonance_radical_df

Unnamed: 0,radical_smiles,resonance_radical_smiles,has_H,has_C,has_N,has_O,Mw (g/mol),num_rings,num_heavy_atoms,num_rotatable_bonds,radical_atom_type
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,True,True,False,True,122.992963,0,8,4,O
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,True,True,False,True,122.992963,0,8,3,O
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,True,True,False,True,106.998048,0,7,3,C
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],True,True,False,True,195.014092,0,13,8,O
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,True,True,False,True,106.998048,0,7,5,C
...,...,...,...,...,...,...,...,...,...,...,...
2800,C1=C[CH]C=1,[C]1=CC=C1,True,True,False,False,51.023475,1,4,0,C
2801,C=C=C1C=[C]C1,C=C=C1C=[C]C1,True,True,False,False,77.039125,1,6,0,C
2802,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,True,True,False,False,77.039125,1,6,1,C
2803,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,True,True,False,False,77.039125,1,6,0,C


In [25]:
radical_data_df = radical_data_df[radical_data_df["smiles"].isin(resonance_radical_df["radical_smiles"])]
radical_data_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2230,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105
2231,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319
2232,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640
2233,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423


In [26]:
radical_data_df = radical_data_df.reset_index(drop=True)
radical_data_df

Unnamed: 0,smiles,H298 (kcal/mol),S298 (cal/mol/K),Sint298 (cal/mol/K),source,level_of_theory,Cp300 (cal/mol/K),Cp400 (cal/mol/K),Cp500 (cal/mol/K),Cp600 (cal/mol/K),Cp800 (cal/mol/K),Cp1000 (cal/mol/K),Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,-223.514126,93.816804,95.999974,dong_pio_liang.py,CBS-QB3,33.926878,38.387673,41.508496,44.062390,47.779730,50.089328,52.510168
1,[O]C(O)(O)OC(=O)O,-223.423390,92.781436,92.781436,dong_pio_liang.py,CBS-QB3,32.531179,37.569659,41.498443,44.710460,49.355989,52.184758,54.821371
2,O=C(O)O[C](O)O,-187.322869,88.139642,89.517068,dong_pio_liang.py,CBS-QB3,28.973242,34.553073,38.746721,41.655671,45.237515,47.423507,49.461069
3,CC(=O)OC(OO)C(=O)C(O)O[O],-185.660577,133.205064,132.633380,dong_pio_liang.py,CBS-QB3,56.569703,66.028226,73.603744,79.772844,88.705511,94.174908,99.503777
4,O=[C]OC(O)(O)O,-180.497563,87.848805,90.031975,dong_pio_liang.py,CBS-QB3,32.378191,36.844343,39.334179,41.301130,44.143726,45.893560,47.744589
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2205,C=C=C1[CH]C1=C,148.970420,78.606070,81.360924,dong_pio_liang.py,CBS-QB3,25.185446,30.771706,34.989720,38.456637,43.926501,47.843872,53.411105
2206,[CH]=C1C=C1,152.166735,66.134536,67.511963,dong_pio_liang.py,CBS-QB3,16.390064,19.702535,22.399354,24.484680,27.637313,29.841300,32.891319
2207,C1=C[CH]C=1,156.106582,64.008341,65.385768,dong_pio_liang.py,CBS-QB3,14.653111,18.488757,21.562144,23.855179,27.322032,29.749165,33.017640
2208,C=C=C1C=[C]C1,157.733085,76.210198,77.587625,dong_pio_liang.py,CBS-QB3,22.675255,28.333884,32.976246,36.811783,42.847715,47.149236,53.155423


In [27]:
radical_data_df.to_csv("../data/radical_data_no_dup.csv", index=False)