## Data Loader File
This file loads data from .csv files to be used in the main Reaction_Rate_Prediction file

## Import needed libraries

In [1]:
import numpy as np
import pandas as pd

## Load data from Chemosphere
Chao Li, Xianhai Yang, Xuehua Li, Jingwen Chen, Xianliang Qiao,
Development of a model for predicting hydroxyl radical reaction rate constants of organic chemicals at different temperatures,
Chemosphere,
Volume 95,
2014,
Pages 613-618,
ISSN 0045-6535,
https://doi.org/10.1016/j.chemosphere.2013.10.020.

In [2]:
Chemosphere = pd.read_csv('./Data/ohtempdata.csv')
from sklearn.utils import shuffle
Chemosphere

Unnamed: 0.1,Unnamed: 0,ID,CAS,Compounds,T,logK,smiles,Tinv
0,1,1,000002-52-3,n-propylpentafluorobenzene,298.0,-11.51,CCCc1c(F)c(F)c(F)c(F)c1F,0.003356
1,2,2,000002-60-0,3-octyl nitrate,298.0,-11.41,CCCCCC(CC)O[N+]([O-])=O,0.003356
2,3,3,000003-82-0,di-iso-propoxymethane,298.0,-10.46,CC(C)OCOC(C)C,0.003356
3,4,4,000050-00-0,formaldehyde,298.0,-10.93,C=O,0.003356
4,5,5,000056-23-5,carbon tetrachloride,298.0,-15.92,ClC(Cl)(Cl)Cl,0.003356
...,...,...,...,...,...,...,...,...
1467,1529,1529,138689-24-4,chlorofluoroacetaldehyde,298.0,-11.68,FC(Cl)C=O,0.003356
1468,1531,1531,144109-03-5,"2,2,3,4,4-Pentafluoro oxetane",298.0,-14.61,FC1C(F)(F)OC1(F)F,0.003356
1469,1541,1541,RC002-52-5,2-Methyl-3-butyl nitrate,298.0,-11.74,CC(C)C(C)O[N+]([O-])=O,0.003356
1470,1542,1542,RC002-52-6,2-Methyl-2-pentyl nitrate,298.0,-11.76,CCCC(C)(C)O[N+]([O-])=O,0.003356


## Load data for Alkanes assembled by Myriam

In [3]:
alkanes = pd.read_csv('./Data/Alkanes.csv')
alkanes = alkanes.dropna()
alkanes.rename(columns={'log10(k)': 'logK','SMILES':'smiles','Name': 'Compounds'}, inplace=True)
alkanes

Unnamed: 0,CAS,Compounds,Formula,smiles,T,logK
0,000074-84-0,ethane,C2H6,CC,138.0,-13.89
1,000074-84-0,ethane,C2H6,CC,178.0,-13.60
2,000074-84-0,ethane,C2H6,CC,180.0,-13.57
3,000074-84-0,ethane,C2H6,CC,190.0,-13.42
4,000074-84-0,ethane,C2H6,CC,200.0,-13.34
...,...,...,...,...,...,...
725,000629-50-5,n-tridecane,C13H28,CCCCCCCCCCCCC,300.0,-10.81
726,000629-50-5,n-tridecane,C13H28,CCCCCCCCCCCCC,312.0,-10.76
727,000629-59-4,n-tetradecane,C14H30,CCCCCCCCCCCCCC,312.0,-10.71
728,000629-62-9,n-pentadecane,C15H32,CCCCCCCCCCCCCCC,312.0,-10.65


## Load data from Mcgillen et al.
McGillen, M. R., Carter, W. P. L., Mellouki, A., Orlando, J. J., Picquet-Varrault, B., and Wallington, T. J.: Database for the kinetics of the gas-phase atmospheric reactions of organic compounds, Earth Syst. Sci. Data, 12, 1203–1216, https://doi.org/10.5194/essd-12-1203-2020, 2020.

In [4]:
mcgillen = pd.read_csv('./Data/Mcgillan.csv')
mcgillen = mcgillen.dropna(subset=['k(298)','o3_k(298)','no3_k(298)','cl_k(298)'],how='all')
mcgillen['logK'] = np.log10(mcgillen['k(298)'])
mcgillen['o3_logK']  = np.log10(mcgillen['o3_k(298)'])
mcgillen['no3_logK'] = np.log10(mcgillen['no3_k(298)'])
mcgillen['cl_logK']  = np.log10(mcgillen['cl_k(298)'])
mcgillen['T'] = 298
mcgillen.rename(columns={'Smiles': 'smiles','Primary name': 'Compounds'}, inplace=True)
mcgillen

Unnamed: 0,Compounds,smiles,InChi,InChiKey,Mwt,Atoms,Alternative Structure ID,Type,k(298),Unc.,...,cl_k(298),cl_Unc.,cl_A,cl_B,cl_n,logK,o3_logK,no3_logK,cl_logK,T
0,methane,C,InChI=1S/CH4/h1H4,VNWKTOKETHGBQD-UHFFFAOYSA-N,16.042460,CH4,,1,6.680000e-15,0.150000,...,1.000000e-13,15%,6.600000e-12,1240.0,,-14.175224,,,-13.000000,298
1,formaldehyde,C=O,InChI=1S/CH2O/c1-2/h1H2,WSFSSNUMVMOOMR-UHFFFAOYSA-N,30.025980,CH2O,,18,8.500000e-12,0.202264,...,7.200000e-11,15%,8.100000e-11,34.0,,-11.070581,,-15.259637,-10.142668,298
2,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,32.041860,CH4O,CH3-OH,16,8.780000e-13,0.100000,...,5.100000e-11,20%,5.100000e-11,0.0,,-12.056505,,-15.886057,-10.292430,298
3,fluoromethane,CF,InChI=1S/CH3F/c1-2/h1H3,NBVXSUQYWXRMNV-UHFFFAOYSA-N,34.032923,CH3F,CH3-F,60,2.100000e-14,0.150000,...,3.600000e-13,41%,4.900000e-12,781.0,,-13.677781,,,-12.443697,298
4,formic acid,OC=O,"InChI=1S/CH2O2/c2-1-3/h1H,(H,2,3)",BDAGIHXWWSANSR-UHFFFAOYSA-N,46.025380,CH2O2,HCO-OH,21,4.500000e-13,0.412538,...,1.900000e-13,41%,,,,-12.346787,,,-12.721246,298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1660,pyrazole,c1ccn[nH]1,"InChI=1S/C3H4N2/c1-2-4-5-3-1/h1-3H,(H,4,5)",WTKZEGDFNFYCGP-UHFFFAOYSA-N,69.090000,C3H5N2,CH@=CH-aCH-NH-NH@,79,2.840000e-11,,...,,,,,,-10.546682,,,,298
1661,"2,2-dichloroethanol",OCC(Cl)Cl,"InChI=1S/C2H4Cl2O/c3-2(4)1-5/h2,5H,1H2",IDJOCJAIQSKSOP-UHFFFAOYSA-N,114.960000,C2H4OCl2,CHCl2-CH2-OH,67,5.300000e-13,0.150000,...,5.980000e-12,15%,,,,-12.275724,,,-11.223299,298
1662,5-methyl-2-hexanol,CC(CCC(O)C)C,"InChI=1S/C7H16O/c1-6(2)4-5-7(3)8/h6-8H,4-5H2,1...",ZDVJGWXFXGJSIU-UHFFFAOYSA-N,116.200000,C7H16O,CH3-CH(CH3)-CH2-CH2-CH(CH3)-OH,16,,,...,2.480000e-10,15%,,,,,,,-9.605548,298
1663,"2,2-dimethyl-3-hexanol",CCCC(C(C)(C)C)O,"InChI=1S/C8H18O/c1-5-6-7(9)8(2,3)4/h7,9H,5-6H2...",PFHLGQKVKALLMD-UHFFFAOYSA-N,130.230000,C8H18O,CH3-CH2-CH2-CH(OH)-C(CH3)(CH3)-CH3,16,,,...,2.550000e-10,15%,,,,,,,-9.593460,298


## Append Arrhenius parameters
All Arrhenius parameters come from the McGillen database on the form of $k(T) = A (\frac{T}{300})^n exp(-\frac{B}{T})$

In [5]:
def append_Arrhenius_Params(df,mcgillen):
    A=np.zeros([df.shape[0],1])
    B=np.zeros([df.shape[0],1])
    n=np.zeros([df.shape[0],1])
    A[:]=np.NaN
    B[:]=np.NaN
    n[:]=np.NaN
    for i in range(df.shape[0]):
        if (mcgillen.smiles==df.smiles[i]).sum() > 0:
            A[i]=mcgillen.iloc[np.where(mcgillen.smiles==df.smiles[i])[0][0],:][['A']][0]
            B[i]=mcgillen.iloc[np.where(mcgillen.smiles==df.smiles[i])[0][0],:][['B']][0]
            n[i]=mcgillen.iloc[np.where(mcgillen.smiles==df.smiles[i])[0][0],:][['n']][0]
    df['A'] = A
    df['B'] = B
    df['n'] = n
append_Arrhenius_Params(Chemosphere,mcgillen)
append_Arrhenius_Params(alkanes,mcgillen)

## Fuse data together into one dataframe

In [6]:
df = alkanes
df = df.append(mcgillen)
df = df.append(Chemosphere)
df = df.reset_index()
df.n.loc[(~df.A.isna() & df.n.isna())]=0
df.o3_n.loc[(~df.o3_A.isna() & df.o3_n.isna())]=0
df.no3_n.loc[(~df.no3_A.isna() & df.no3_n.isna())]=0
df.cl_n.loc[(~df.cl_A.isna() & df.cl_n.isna())]=0
df.A = np.log10(df.A)
df.o3_A = np.log10(df.o3_A)
df.no3_A = np.log10(df.no3_A)
df.cl_A = np.log10(df.cl_A)
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Unnamed: 0.1,index,CAS,Compounds,Formula,smiles,T,logK,A,B,n,...,cl_Unc.,cl_A,cl_B,cl_n,o3_logK,no3_logK,cl_logK,Unnamed: 0,ID,Tinv
0,0,000074-84-0,ethane,C2H6,CC,138.0,-13.89,-11.821023,533.0,1.92,...,,,,,,,,,,
1,1,000074-84-0,ethane,C2H6,CC,178.0,-13.60,-11.821023,533.0,1.92,...,,,,,,,,,,
2,2,000074-84-0,ethane,C2H6,CC,180.0,-13.57,-11.821023,533.0,1.92,...,,,,,,,,,,
3,3,000074-84-0,ethane,C2H6,CC,190.0,-13.42,-11.821023,533.0,1.92,...,,,,,,,,,,
4,4,000074-84-0,ethane,C2H6,CC,200.0,-13.34,-11.821023,533.0,1.92,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3784,1467,138689-24-4,chlorofluoroacetaldehyde,,FC(Cl)C=O,298.0,-11.68,,,,...,,,,,,,,1529.0,1529.0,0.003356
3785,1468,144109-03-5,"2,2,3,4,4-Pentafluoro oxetane",,FC1C(F)(F)OC1(F)F,298.0,-14.61,,,,...,,,,,,,,1531.0,1531.0,0.003356
3786,1469,RC002-52-5,2-Methyl-3-butyl nitrate,,CC(C)C(C)O[N+]([O-])=O,298.0,-11.74,,,,...,,,,,,,,1541.0,1541.0,0.003356
3787,1470,RC002-52-6,2-Methyl-2-pentyl nitrate,,CCCC(C)(C)O[N+]([O-])=O,298.0,-11.76,,,,...,,,,,,,,1542.0,1542.0,0.003356


In [34]:
AtmVOC = pd.read_csv('./Data/AtmVOC.csv')
AtmVOC

Unnamed: 0,C.no,Primary name,Smiles,InChi,InChiKey,Mwt,Atoms,Alternative Structure ID,Type,SepDBs,...,no3_Loc,cl_Type,cl_k(298),cl_Unc.,cl_A,cl_B,cl_n,cl_Low,cl_High,cl_Loc
0,1,methane,C,InChI=1S/CH4/h1H4,VNWKTOKETHGBQD-UHFFFAOYSA-N,16.042460,CH4,,1,,...,,2,1.000000e-13,0.148154,6.600000e-12,1240.0,,200.0,300.0,3851.0
1,2,formaldehyde,C=O,InChI=1S/CH2O/c1-2/h1H2,WSFSSNUMVMOOMR-UHFFFAOYSA-N,30.025980,CH2O,,18,,...,2672.0,2,7.200000e-11,0.148154,8.100000e-11,34.0,,200.0,500.0,3854.0
2,3,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,32.041860,CH4O,CH3-OH,16,,...,2675.0,2,5.100000e-11,0.200000,5.100000e-11,0.0,,225.0,950.0,3857.0
3,4,fluoromethane,CF,InChI=1S/CH3F/c1-2/h1H3,NBVXSUQYWXRMNV-UHFFFAOYSA-N,34.032923,CH3F,CH3-F,60,,...,,2,3.600000e-13,0.412538,4.900000e-12,781.0,,200.0,300.0,3861.0
4,5,formic acid,OC=O,"InChI=1S/CH2O2/c2-1-3/h1H,(H,2,3)",BDAGIHXWWSANSR-UHFFFAOYSA-N,46.025380,CH2O2,HCO-OH,21,,...,,1,1.900000e-13,0.412538,,,,,,3864.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,1692,5-methyl-2-hexanol,CC(CCC(O)C)C,"InChI=1S/C7H16O/c1-6(2)4-5-7(3)8/h6-8H,4-5H2,1...",ZDVJGWXFXGJSIU-UHFFFAOYSA-N,116.200000,C7H16O,CH3-CH(CH3)-CH2-CH2-CH(CH3)-OH,16,,...,,1,2.480000e-10,0.150000,,,,,,5048.0
1663,1693,"2,2-dimethyl-3-hexanol",CCCC(C(C)(C)C)O,"InChI=1S/C8H18O/c1-5-6-7(9)8(2,3)4/h7,9H,5-6H2...",PFHLGQKVKALLMD-UHFFFAOYSA-N,130.230000,C8H18O,CH3-CH2-CH2-CH(OH)-C(CH3)(CH3)-CH3,16,,...,,1,2.550000e-10,0.150000,,,,,,5051.0
1664,1694,"2,4,4-trimethyl-1-pentanol",OCC(CC(C)(C)C)C,"InChI=1S/C8H18O/c1-7(6-9)5-8(2,3)4/h7,9H,5-6H2...",ZNRVRWHPZZOTIE-UHFFFAOYSA-N,130.230000,C8H18O,CH3-CH(CH2-OH)-CH2-C(CH3)(CH3)-CH3,16,,...,,1,2.350000e-10,0.150000,,,,,,5054.0
1665,1695,"2-methyl-1,3-dioxolane",CC1OCCO1,"InChI=1S/C4H8O2/c1-4-5-2-3-6-4/h4H,2-3H2,1H3",HTWIZMNMTWYQRN-UHFFFAOYSA-N,88.105100,C4H8O2,CH3-CH@-O-CH2-CH2-O@,17,,...,,,,,,,,,,


In [35]:
AtmVOC_KT = pd.read_csv('./Data/AtmVOC_KT.csv')
AtmVOC_KT.Unc = AtmVOC_KT.Unc/AtmVOC_KT['k(T)']

In [36]:
# smiles=AtmVOC_KT['Rxn'].copy()
# names=AtmVOC_KT['Rxn'].copy()
# # smiles=AtmVOC_KT['Rxn'].copy()
# # smiles=AtmVOC_KT['Rxn'].copy()
# for i in range(AtmVOC_KT.shape[0]):
#     smiles[i]=AtmVOC['Smiles'][np.where(AtmVOC['No.']==AtmVOC_KT['C.no'][i])[0][0]]
#     names[i]=AtmVOC['Primary name'][np.where(AtmVOC['No.']==AtmVOC_KT['C.no'][i])[0][0]]
# AtmVOC_KT['smiles'] = smiles
# AtmVOC_KT['Compounds'] = names

# AtmVOC_KT

In [37]:
def append_vec(df,AtmVOC,col):
    if AtmVOC[col].isnull().sum()==len(AtmVOC[col]):
        vec=np.zeros([df.shape[0],1])
        vec[:]=np.NaN
    elif type(AtmVOC[col][AtmVOC[col].first_valid_index()]) == str:
        vec = AtmVOC['C.no'].copy()
    else:
        vec=np.zeros([df.shape[0],1])
        vec[:]=np.NaN
    for i in range(df.shape[0]):
        if (AtmVOC['C.no']==df['C.no'][i]).sum() > 0:
            vec[i]=AtmVOC.iloc[np.where(AtmVOC['C.no']==df['C.no'][i])[0][0],:][[col]][0]
    df[col] = vec
for i in range(len(AtmVOC.columns)):
    append_vec(AtmVOC_KT,AtmVOC,AtmVOC.columns[i])
AtmVOC_KT

Unnamed: 0,Rxn,C.no,RefID,T,k(T),Unc,Primary name,Smiles,InChi,InChiKey,...,no3_Loc,cl_Type,cl_k(298),cl_Unc.,cl_A,cl_B,cl_n,cl_Low,cl_High,cl_Loc
0,OH,3.0,kt003,240.0,6.570000e-13,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,2675.0,2,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0
1,OH,3.0,kt003,296.0,8.610000e-13,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,2675.0,2,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0
2,OH,3.0,kt003,350.0,1.250000e-12,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,2675.0,2,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0
3,OH,3.0,kt003,400.0,1.410000e-12,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,2675.0,2,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0
4,OH,3.0,kt003,440.0,1.620000e-12,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,2675.0,2,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2891,Cl,8.0,up180,843.0,7.560000e-12,0.046296,chloromethane,CCl,InChI=1S/CH3Cl/c1-2/h1H3,NEHMKBQYUWJMIP-UHFFFAOYSA-N,...,,2,4.890000e-13,0.1,4.320000e-12,646.4,1.3057,222.0,843.0,3871.0
2892,Cl,8.0,up126,296.0,4.800000e-13,0.150000,chloromethane,CCl,InChI=1S/CH3Cl/c1-2/h1H3,NEHMKBQYUWJMIP-UHFFFAOYSA-N,...,,2,4.890000e-13,0.1,4.320000e-12,646.4,1.3057,222.0,843.0,3871.0
2893,Cl,8.0,up126,296.0,5.000000e-13,0.150000,chloromethane,CCl,InChI=1S/CH3Cl/c1-2/h1H3,NEHMKBQYUWJMIP-UHFFFAOYSA-N,...,,2,4.890000e-13,0.1,4.320000e-12,646.4,1.3057,222.0,843.0,3871.0
2894,Cl,8.0,up126,296.0,4.800000e-13,0.150000,chloromethane,CCl,InChI=1S/CH3Cl/c1-2/h1H3,NEHMKBQYUWJMIP-UHFFFAOYSA-N,...,,2,4.890000e-13,0.1,4.320000e-12,646.4,1.3057,222.0,843.0,3871.0


In [38]:
oh_logk = np.log10(AtmVOC_KT['k(T)'].values)
oh_logk[np.where(AtmVOC_KT.Rxn=='Cl')]=np.NaN
cl_logk = np.log10(AtmVOC_KT['k(T)'].values)
cl_logk[np.where(AtmVOC_KT.Rxn=='OH')]=np.NaN
AtmVOC_KT['oh_logK'] = oh_logk
AtmVOC_KT['cl_logK'] = cl_logk
AtmVOC_KT

Unnamed: 0,Rxn,C.no,RefID,T,k(T),Unc,Primary name,Smiles,InChi,InChiKey,...,cl_k(298),cl_Unc.,cl_A,cl_B,cl_n,cl_Low,cl_High,cl_Loc,oh_logk,cl_logk
0,OH,3.0,kt003,240.0,6.570000e-13,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0,-12.182435,
1,OH,3.0,kt003,296.0,8.610000e-13,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0,-12.064997,
2,OH,3.0,kt003,350.0,1.250000e-12,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0,-11.903090,
3,OH,3.0,kt003,400.0,1.410000e-12,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0,-11.850781,
4,OH,3.0,kt003,440.0,1.620000e-12,0.200000,methanol,CO,"InChI=1S/CH4O/c1-2/h2H,1H3",OKKJLVBELUTLKV-UHFFFAOYSA-N,...,5.100000e-11,0.2,5.100000e-11,0.0,,225.0,950.0,3857.0,-11.790485,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2891,Cl,8.0,up180,843.0,7.560000e-12,0.046296,chloromethane,CCl,InChI=1S/CH3Cl/c1-2/h1H3,NEHMKBQYUWJMIP-UHFFFAOYSA-N,...,4.890000e-13,0.1,4.320000e-12,646.4,1.3057,222.0,843.0,3871.0,,-11.121478
2892,Cl,8.0,up126,296.0,4.800000e-13,0.150000,chloromethane,CCl,InChI=1S/CH3Cl/c1-2/h1H3,NEHMKBQYUWJMIP-UHFFFAOYSA-N,...,4.890000e-13,0.1,4.320000e-12,646.4,1.3057,222.0,843.0,3871.0,,-12.318759
2893,Cl,8.0,up126,296.0,5.000000e-13,0.150000,chloromethane,CCl,InChI=1S/CH3Cl/c1-2/h1H3,NEHMKBQYUWJMIP-UHFFFAOYSA-N,...,4.890000e-13,0.1,4.320000e-12,646.4,1.3057,222.0,843.0,3871.0,,-12.301030
2894,Cl,8.0,up126,296.0,4.800000e-13,0.150000,chloromethane,CCl,InChI=1S/CH3Cl/c1-2/h1H3,NEHMKBQYUWJMIP-UHFFFAOYSA-N,...,4.890000e-13,0.1,4.320000e-12,646.4,1.3057,222.0,843.0,3871.0,,-12.318759


In [1]:
AtmVOC['oh_logK'] = np.log10(AtmVOC['oh_k(298)'])
AtmVOC['o3_logK']  = np.log10(AtmVOC['o3_k(298)'])
AtmVOC['no3_logK'] = np.log10(AtmVOC['no3_k(298)'])
AtmVOC['cl_logK']  = np.log10(AtmVOC['cl_k(298)'])
AtmVOC['T'] = 298
df = AtmVOC.append(AtmVOC_KT)
df = df.dropna(subset=['oh_k(298)','o3_k(298)','no3_k(298)','cl_k(298)'],how='all')
df.rename(columns={'Smiles': 'smiles','Primary name': 'Compounds'}, inplace=True)
df.index = list(range(df.shape[0]))
df.oh_n.loc[(~df.oh_A.isna() & df.oh_n.isna())]=0
df.o3_n.loc[(~df.o3_A.isna() & df.o3_n.isna())]=0
df.no3_n.loc[(~df.no3_A.isna() & df.no3_n.isna())]=0
df.cl_n.loc[(~df.cl_A.isna() & df.cl_n.isna())]=0
df.oh_A = np.log10(df.oh_A)
df.o3_A = np.log10(df.o3_A)
df.no3_A = np.log10(df.no3_A)
df.cl_A = np.log10(df.cl_A)
df

NameError: name 'np' is not defined

## Dummy function to return dataframe

In [None]:
def make_df(df=df):
    return(df)