In [87]:
import string

import numpy as np

import pandas as pd

from joblib import Parallel, delayed
from tqdm import tqdm

from rdmc.mol import RDKitMol
from rdkit.Chem.Descriptors import ExactMolWt, NumRadicalElectrons
from rdkit.Chem.rdMolDescriptors import CalcNumRings, CalcNumHBD, CalcNumHeavyAtoms, CalcNumRotatableBonds
from rdkit.Chem import GetPeriodicTable

import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams['mathtext.fontset'] = 'stix'
matplotlib.rcParams['font.family'] = 'STIXGeneral'

# Get descriptors

In [19]:
HBI_correction_df = pd.read_csv("../data/hbi.csv")
HBI_correction_df

Unnamed: 0,radical_smiles,radical_resonance_smiles,closed_shell_smiles,closed_shell_H298 (kcal/mol),closed_shell_S298 (cal/mol/K),closed_shell_Cp300 (cal/mol/K),closed_shell_Cp400 (cal/mol/K),closed_shell_Cp500 (cal/mol/K),closed_shell_Cp600 (cal/mol/K),closed_shell_Cp800 (cal/mol/K),...,radical_num_resonance,HBI_H298 (kcal/mol),HBI_Sint298 (cal/mol/K),HBI_Cp300 (cal/mol/K),HBI_Cp400 (cal/mol/K),HBI_Cp500 (cal/mol/K),HBI_Cp600 (cal/mol/K),HBI_Cp800 (cal/mol/K),HBI_Cp1000 (cal/mol/K),HBI_Cp1500 (cal/mol/K)
0,[O]C(=O)OC(O)(O)O,[O]C(=O)OC(O)(O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,...,1,49.103465,-4.589644,2.364565,1.559776,0.412894,-0.172027,-1.041685,-1.683176,-3.705453
1,[O]C(O)(O)OC(=O)O,[O]C(O)(O)OC(=O)O,O=C(O)OC(O)(O)O,-272.617591,98.406448,31.562314,36.827897,41.095602,44.234417,48.821415,...,1,49.194200,-7.808181,0.968866,0.741762,0.402841,0.476043,0.534574,0.412253,-1.394250
2,O=C(O)O[C](O)O,O=C(O)O[C](O)O,O=C(O)OC(O)O,-231.363843,94.127419,27.214474,31.965583,36.032333,39.309560,44.059044,...,1,44.040974,-4.610350,1.758768,2.587490,2.714389,2.346110,1.178471,0.282207,-0.997631
3,CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)O[O],CC(=O)OC(OO)C(=O)C(O)OO,-234.219312,145.598548,52.706616,62.438776,71.442505,78.070363,87.443365,...,1,48.558735,-12.393484,3.863088,3.589450,2.161239,1.702481,1.262146,-0.928170,-3.176204
4,O=[C]OC(O)(O)O,O=[C]OC(O)(O)O,O=COC(O)(O)O,-216.499044,92.684651,26.935163,30.962696,34.637667,37.592447,42.375430,...,1,36.001481,-4.835845,5.443029,5.881647,4.696512,3.708683,1.768296,0.144000,-3.234417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2862,C1=C[CH]C=1,[C]1=CC=C1,C1=CC=C1,104.333500,57.818020,15.504000,19.920000,23.474000,26.261000,30.329000,...,2,51.773082,3.435468,-0.850889,-1.431243,-1.911856,-2.405821,-3.006968,-3.449835,-4.556360
2863,C=C=C1C=[C]C1,C=C=C1C=[C]C1,C=C=C1C=CC1,91.366874,50.394958,22.416919,29.489266,35.000461,39.185145,45.162263,...,1,66.366211,25.815240,0.258337,-1.155381,-2.024215,-2.373362,-2.314548,-2.090808,-2.168527
2864,[CH2]C1=CC#CC1,[CH2]C1=CC#CC1,CC1=CC#CC1,131.227000,72.956831,23.409000,30.017000,34.619000,39.828000,46.529000,...,3,32.271344,0.823844,-0.802362,-1.521882,-1.227900,-2.500041,-3.127836,-3.711522,-2.960733
2865,[CH2]C1=CC#CC1,C=C1[CH]C#CC1,C=C1CC#CC1,136.767000,72.435147,22.819000,29.017000,34.189000,38.448000,45.419000,...,3,26.731344,0.773844,-0.212362,-0.521882,-0.797900,-1.120041,-2.017836,-2.931522,-2.570733


In [20]:
radical_resonance_df = pd.DataFrame()
radical_resonance_df["smiles"] = HBI_correction_df["radical_resonance_smiles"]
radical_resonance_df

Unnamed: 0,smiles
0,[O]C(=O)OC(O)(O)O
1,[O]C(O)(O)OC(=O)O
2,O=C(O)O[C](O)O
3,CC(=O)OC(OO)C(=O)C(O)O[O]
4,O=[C]OC(O)(O)O
...,...
2862,[C]1=CC=C1
2863,C=C=C1C=[C]C1
2864,[CH2]C1=CC#CC1
2865,C=C1[CH]C#CC1


In [70]:
def get_descriptors(smi):
    mol = RDKitMol.FromSmiles(smi)
    radical_atom = [atom for atom in mol.GetAtoms() if atom.GetNumRadicalElectrons()!=0][0]
    return set(mol.GetElementSymbols()), ExactMolWt(mol._mol), CalcNumRings(mol._mol), CalcNumHeavyAtoms(mol._mol), CalcNumRotatableBonds(mol._mol), radical_atom.GetSymbol()




In [71]:
descriptorss = Parallel(backend="multiprocessing", n_jobs=4)(delayed(get_descriptors)(smi) for smi in tqdm(radical_resonance_df["smiles"]))




100%|██████████| 2867/2867 [00:03<00:00, 716.97it/s]


In [72]:
atom_symbolss = [descriptors[0] for descriptors in descriptorss]

In [73]:
all_atom_symbols = set()
for atom_symbols in atom_symbolss:
    all_atom_symbols.update(atom_symbols)
all_atom_symbols

{'C', 'H', 'N', 'O'}

In [74]:
PeriodicTable = GetPeriodicTable()
sorted_all_atom_symbols = list(all_atom_symbols)
sorted_all_atom_symbols.sort(key=lambda x: PeriodicTable.GetAtomicNumber(x))
sorted_all_atom_symbols

['H', 'C', 'N', 'O']

In [75]:
for atom_symbol in sorted_all_atom_symbols:
    radical_resonance_df[f"has_{atom_symbol}"] = [atom_symbol in atom_symbols for atom_symbols in atom_symbolss]
radical_resonance_df

Unnamed: 0,smiles,has_H,has_C,has_N,has_O,Mw (g/mol),num_rings,num_heavy_atoms,num_rotatable_bonds
0,[O]C(=O)OC(O)(O)O,True,True,False,True,122.992963,0,8,4
1,[O]C(O)(O)OC(=O)O,True,True,False,True,122.992963,0,8,3
2,O=C(O)O[C](O)O,True,True,False,True,106.998048,0,7,3
3,CC(=O)OC(OO)C(=O)C(O)O[O],True,True,False,True,195.014092,0,13,8
4,O=[C]OC(O)(O)O,True,True,False,True,106.998048,0,7,5
...,...,...,...,...,...,...,...,...,...
2862,[C]1=CC=C1,True,True,False,False,51.023475,1,4,0
2863,C=C=C1C=[C]C1,True,True,False,False,77.039125,1,6,0
2864,[CH2]C1=CC#CC1,True,True,False,False,77.039125,1,6,1
2865,C=C1[CH]C#CC1,True,True,False,False,77.039125,1,6,0


In [76]:
radical_resonance_df["Mw (g/mol)"] = [descriptors[1] for descriptors in descriptorss]

In [77]:
radical_resonance_df["num_rings"] = [descriptors[2] for descriptors in descriptorss]

In [78]:
radical_resonance_df["num_heavy_atoms"] = [descriptors[3] for descriptors in descriptorss]

In [30]:
radical_resonance_df["num_rotatable_bonds"] = [descriptors[4] for descriptors in descriptorss]

In [79]:
radical_resonance_df["radical_atom_type"] = [descriptors[5] for descriptors in descriptorss]

In [80]:
radical_resonance_df

Unnamed: 0,smiles,has_H,has_C,has_N,has_O,Mw (g/mol),num_rings,num_heavy_atoms,num_rotatable_bonds,radical_atom_type
0,[O]C(=O)OC(O)(O)O,True,True,False,True,122.992963,0,8,4,O
1,[O]C(O)(O)OC(=O)O,True,True,False,True,122.992963,0,8,3,O
2,O=C(O)O[C](O)O,True,True,False,True,106.998048,0,7,3,C
3,CC(=O)OC(OO)C(=O)C(O)O[O],True,True,False,True,195.014092,0,13,8,O
4,O=[C]OC(O)(O)O,True,True,False,True,106.998048,0,7,5,C
...,...,...,...,...,...,...,...,...,...,...
2862,[C]1=CC=C1,True,True,False,False,51.023475,1,4,0,C
2863,C=C=C1C=[C]C1,True,True,False,False,77.039125,1,6,0,C
2864,[CH2]C1=CC#CC1,True,True,False,False,77.039125,1,6,1,C
2865,C=C1[CH]C#CC1,True,True,False,False,77.039125,1,6,0,C


In [81]:
radical_resonance_df.to_csv("../data/radical_resonance.csv", index=False)