In [1]:
import sys
sys.path.append('..')
import chemex as cx
import chemex.web
from itertools import islice
import pandas as pd
from pandas import DataFrame

In [2]:
# Paste ChemSpider IDs from a spreadsheet into this string:
csids_col = '''
23140
8677
5989
74268
13835224
733
199342
28950
8060
13850135
13849989
1267362
20939
13849981
6313
592
3
23237
13848467
4938996
36393
16567
''' # end CSIDs
csids = csids_col.strip().split('\n')
multi_data = cx.web.cs_properties_gen(csids, cx.web.cs_default_props)
data_df = DataFrame(multi_data).set_index('CSID')

In [3]:
data_df

Unnamed: 0_level_0,EPI Suite,Experimental Boiling Point,Experimental LogP,Experimental Melting Point,Experimental Solubility
CSID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
23140,\n\nPredicted data is generated using the US E...,,,,
8677,,,,"[206 °C Alfa Aesar, 204-207 °C Oxford Universi...",
5989,,,,[300 °C Alfa Aesar 45556],[Soluble to 3000 mM in water Tocris Bioscience...
74268,\n\nPredicted data is generated using the US E...,,,,
13835224,\n\nPredicted data is generated using the US E...,"[186-188 °C Alfa Aesar, 188 °C Food and Agricu...",[-1.341 Vitas-M STL146584],"[-60 °C Alfa Aesar, -60 °C Oxford University C...","[Miscible with water, acetone, chloroform. So..."
733,\n\nPredicted data is generated using the US E...,[182 deg C / 20 mm (335.1395 °C / 760 mmHg)\r\...,,"[18 °C Alfa Aesar, 17.8 °C Oxford University C...","[Miscible NIOSH MA8050000, Soluble to 1000 mM ..."
199342,\n\nPredicted data is generated using the US E...,,[4.072 Vitas-M STK709232],,
28950,\n\nPredicted data is generated using the US E...,,,,
8060,\n\nPredicted data is generated using the US E...,[190 deg C / 14 mm (360.4696 °C / 760 mmHg)\r\...,,"[18-20 °C Alfa Aesar, 19 °C Jean-Claude Bradle...",
13850135,\n\nPredicted data is generated using the US E...,"[221-222 °C Alfa Aesar, 225 °C Food and Agricu...",[3.382 Vitas-M STK085542],[77-83 °C (Literature) Indofine \r\n ...,


In [4]:
def series_to_text(s):
    lst = []
    for i in s.index:
        if type(s[i]) is list:
            lst.append(i + ':')
            lst = lst + ['  ' + x.split('\n')[0].strip() for x in s[i]]
        elif type(s[i]) is dict:
            lst = lst + [': '.join([str(k), str(v)]) for k, v in s[i].items()]
    txt = '\n'.join(lst)
    return txt

def dict_to_text(d):
    lst = [': '.join([str(k), str(v)]) for k, v in d.items()]
    return '\n'.join(lst)

In [5]:
data_df['EPI Suite Results'] = data_df['EPI Suite'].apply(cx.web.epi_suite_values)
epi_res = data_df['EPI Suite Results']
epi_res_txt = epi_res.apply(dict_to_text)
epi_res_txt

CSID
23140       Ready Biodegradability Prediction: YES\nLog BC...
8677                                                         
5989                                                         
74268       Ready Biodegradability Prediction: YES\nLog BC...
13835224    Ready Biodegradability Prediction: YES\nLog BC...
733         Ready Biodegradability Prediction: YES\nLog BC...
199342      Ready Biodegradability Prediction: NO\nLog BCF...
28950       Ready Biodegradability Prediction: YES\nLog BC...
8060        Ready Biodegradability Prediction: NO\nLog BCF...
13850135    Log Kow (KOWWIN v1.67 estimate): 3.56\nLog Kow...
13849989    Log Kow (KOWWIN v1.67 estimate): 3.47\nLog Kow...
1267362     Ready Biodegradability Prediction: YES\nLog BC...
20939       Log Kow (KOWWIN v1.67 estimate): 4.83\nLog Kow...
13849981    Log Kow (KOWWIN v1.67 estimate): 3.38\nLog Kow...
6313        Ready Biodegradability Prediction: YES\nLog BC...
592         Ready Biodegradability Prediction: YES\nLog BC...
3  

In [9]:
print(epi_res_txt[3])

Ready Biodegradability Prediction: YES
Log BCF from regression-based method: 1.249 (BCF = 17.74)
Log Koa (experimental database): None
Henrys LC [VP/WSol estimate using EPI values]: 3.644E-007 atm-m3/mole
Log Kow (KOWWIN v1.67 estimate): 2.53
Log Koa (KOAWIN v1.10 estimate): 7.436
Level III Fugacity Model: 
           Mass Amount    Half-Life    Emissions
            (percent)        (hr)       (kg/hr)
   Air       0.864           8.8          1000       
   Water     25.7            208          1000       
   Soil      73.3            416          1000       
   Sediment  0.158           1.87e+003    0          
     Persistence Time: 323 hr


In [7]:
physchem_df = data_df[['Experimental LogP', 'Experimental Solubility', 'Experimental Boiling Point', 'Experimental Melting Point']]
physchem_txt = physchem_df.apply(series_to_text, axis=1)
physchem_txt

CSID
23140                                                        
8677        Experimental Melting Point:\n  206 °C Alfa Aes...
5989        Experimental Solubility:\n  Soluble to 3000 mM...
74268                                                        
13835224    Experimental LogP:\n  -1.341 Vitas-M STL146584...
733         Experimental Solubility:\n  Miscible NIOSH MA8...
199342          Experimental LogP:\n  4.072 Vitas-M STK709232
28950                                                        
8060        Experimental Boiling Point:\n  190 deg C / 14 ...
13850135    Experimental LogP:\n  3.382 Vitas-M STK085542\...
13849989    Experimental Boiling Point:\n  230 °C Food and...
1267362     Experimental Boiling Point:\n  174-175 °C / 15...
20939       Experimental Solubility:\n  Insoluble in water...
13849981    Experimental Boiling Point:\n  199-200 °C Alfa...
6313        Experimental Boiling Point:\n  63-65 °C Alfa A...
592         Experimental Boiling Point:\n  122 °C / 15 mm ...
3  

In [8]:
print(physchem_txt[6])

Experimental LogP:
  4.072 Vitas-M STK709232


In [10]:
hazard_cols = DataFrame({'EPI Suite Results': epi_res_txt, 'Phys/Chem Properties': physchem_txt})
hazard_cols

Unnamed: 0_level_0,EPI Suite Results,Phys/Chem Properties
CSID,Unnamed: 1_level_1,Unnamed: 2_level_1
23140,Ready Biodegradability Prediction: YES\nLog BC...,
8677,,Experimental Melting Point:\n 206 °C Alfa Aes...
5989,,Experimental Solubility:\n Soluble to 3000 mM...
74268,Ready Biodegradability Prediction: YES\nLog BC...,
13835224,Ready Biodegradability Prediction: YES\nLog BC...,Experimental LogP:\n -1.341 Vitas-M STL146584...
733,Ready Biodegradability Prediction: YES\nLog BC...,Experimental Solubility:\n Miscible NIOSH MA8...
199342,Ready Biodegradability Prediction: NO\nLog BCF...,Experimental LogP:\n 4.072 Vitas-M STK709232
28950,Ready Biodegradability Prediction: YES\nLog BC...,
8060,Ready Biodegradability Prediction: NO\nLog BCF...,Experimental Boiling Point:\n 190 deg C / 14 ...
13850135,Log Kow (KOWWIN v1.67 estimate): 3.56\nLog Kow...,Experimental LogP:\n 3.382 Vitas-M STK085542\...


In [11]:
hazard_cols.to_excel('../results/cs_epi_props.xlsx')