In [1]:
import sys
sys.path.append('..')
import chemex as cx
import chemex.web
from itertools import islice
import pandas as pd
from pandas import DataFrame

For an explanation of what is going on here see [this notebook](https://github.com/akokai/chemex/blob/master/notebooks/Scraping%20properties%20from%20ChemSpider.ipynb).

In [2]:
# Paste ChemSpider IDs from a spreadsheet into this string:
csids_col = '''
23140
8677
5989
74268
13835224
733
199342
28950
8060
13850135
13849989
1267362
20939
13849981
6313
592
3
23237
13848467
4938996
36393
16567
4572358
142476
13848341
13865630
63149
76204
''' # end CSIDs

csids = csids_col.strip().split('\n')

# Scrape a whole buch of ChemSpider pages.
multi_data = cx.web.cs_properties_gen(csids, cx.web.cs_default_props)
datalist = list(islice(multi_data, None))

In [3]:
DataFrame(datalist, index=csids)

Unnamed: 0,ACD/BCF (pH 7.4),ACD/Boiling Point,ACD/Flash Point,ACD/KOC (pH 7.4),ACD/LogP,ACD/Vapour Pressure,CSID,EPI Suite,Experimental Boiling Point,Experimental LogP,Experimental Melting Point,Experimental Solubility
23140,[3620.19],[304.4±10.0 °C at 760 mmHg],[74.9±6.8 °C],[12272.53],[5.05],[0.0±1.4 mmHg at 25°C],[23140],[Predicted data is generated using the US Envi...,,,,
8677,[],[],[],[],[],[],[8677],[None],,,"[206 °C Alfa Aesar, 204-207 °C Oxford Universi...",
5989,[],[],[],[],[],[],[5989],[None],,,[300 °C Alfa Aesar 45556],[Soluble to 3000 mM in water Tocris Bioscience...
74268,[143.78],[241.7±8.0 °C at 760 mmHg],[65.8±6.3 °C],[1219.24],[2.92],[0.0±1.1 mmHg at 25°C],[74268],[Predicted data is generated using the US Envi...,,,,
13835224,[1.00],[184.8±8.0 °C at 760 mmHg],[107.2±0.0 °C],[6.67],[-1.34],[0.2±0.8 mmHg at 25°C],[13835224],[Predicted data is generated using the US Envi...,"[186-188 °C Alfa Aesar, 188 °C Food and Agricu...",[-1.341 Vitas-M STL146584],"[-60 °C Alfa Aesar, -60 °C Oxford University C...","[Miscible with water, acetone, chloroform. So..."
733,[1.00],[290.0±0.0 °C at 760 mmHg],[160.0±0.0 °C],[2.73],[-2.32],[0.0±1.3 mmHg at 25°C],[733],[Predicted data is generated using the US Envi...,[182 deg C / 20 mm (335.1395 °C / 760 mmHg)\r\...,,"[18 °C Alfa Aesar, 17.8 °C Oxford University C...","[Miscible NIOSH MA8050000, Soluble to 1000 mM ..."
199342,[891.89],[277.7±9.0 °C at 760 mmHg],[120.9±10.2 °C],[4502.29],[4.07],[0.0±0.6 mmHg at 25°C],[199342],[Predicted data is generated using the US Envi...,,[4.072 Vitas-M STK709232],,
28950,[1738.05],[288.5±0.0 °C at 760 mmHg],[131.1±14.6 °C],[7258.27],[4.80],[0.0±0.6 mmHg at 25°C],[28950],[Predicted data is generated using the US Envi...,,,,
8060,[628.40],[320.0±0.0 °C at 760 mmHg],[146.4±13.7 °C],[3331.78],[4.01],[0.0±0.7 mmHg at 25°C],[8060],[Predicted data is generated using the US Envi...,[190 deg C / 14 mm (360.4696 °C / 760 mmHg)\r\...,,"[18-20 °C Alfa Aesar, 19 °C Jean-Claude Bradle...",
13850135,[300.14],[224.5±0.0 °C at 760 mmHg],[98.3±0.0 °C],[2064.79],[3.38],[0.0±0.9 mmHg at 25°C],[13850135],[Predicted data is generated using the US Envi...,"[221-222 °C Alfa Aesar, 225 °C Food and Agricu...",[3.382 Vitas-M STK085542],[77-83 °C (Literature) Indofine \r\n ...,


I want to condense some of this information back into blobs of text for a human-readable table.

In [4]:
def txt_simplified(omd, ignore_keys=['CSID', 'EPI Suite']):
    lst = []
    for k in omd.iterkeys():
        if k in ignore_keys:
            continue 
        v = str(omd.getlist(k)[0]).split('\n')[0].strip()
        if v:
            lst.append(k + ': ' + v)
    return '\n'.join(lst)

def epi_txt(omd):
    epi_omd = cx.web.epi_suite_values(omd.get('EPI Suite'))
    lst = [': '.join([str(k), str(v)]) for k, v in epi_omd.items()]
    return '\n'.join(lst)

In [5]:
# An example of what these functions do.
print(txt_simplified(datalist[4]))
print('----')
print(epi_txt(datalist[4]))

Experimental Melting Point: -60 °C Alfa Aesar
Experimental Boiling Point: 186-188 °C Alfa Aesar
Experimental LogP: -1.341 Vitas-M STL146584
Experimental Solubility: Miscible with water, acetone, chloroform.  Soluble in ether Alfa Aesar 30948
ACD/Boiling Point: 184.8±8.0 °C at 760 mmHg
ACD/Vapour Pressure: 0.2±0.8 mmHg at 25°C
ACD/Flash Point: 107.2±0.0 °C
ACD/LogP: -1.34
ACD/BCF (pH 7.4): 1.00
ACD/KOC (pH 7.4): 6.67
----
Log Kow (KOWWIN v1.67 estimate): -0.78
Log Kow (Exper. database match): -0.92
Henrys LC [VP/WSol estimate using EPI values]: 1.370E-008 atm-m3/mole
Log Koa (KOAWIN v1.10 estimate): 4.228
Log Koa (experimental database): None
Ready Biodegradability Prediction: YES
Log BCF from regression-based method: 0.500 (BCF = 3.162)
Level III Fugacity Model: 
           Mass Amount    Half-Life    Emissions
            (percent)        (hr)       (kg/hr)
   Air       2.22            21.4         1000       
   Water     40              208          1000       
   Soil      57.7    

In [6]:
out_df = DataFrame([(d.get('CSID'), epi_txt(d), txt_simplified(d)) for d in datalist],
                   columns=('CSID', 'EPI Suite', 'Physical Properties')).set_index('CSID')
out_df

Unnamed: 0_level_0,EPI Suite,Physical Properties
CSID,Unnamed: 1_level_1,Unnamed: 2_level_1
23140,Log Kow (KOWWIN v1.67 estimate): 4.50\nHenrys ...,ACD/Boiling Point: 304.4±10.0 °C at 760 mmHg\n...
8677,,Experimental Melting Point: 206 °C Alfa Aesar
5989,,Experimental Melting Point: 300 °C Alfa Aesar ...
74268,Log Kow (KOWWIN v1.67 estimate): 2.53\nHenrys ...,ACD/Boiling Point: 241.7±8.0 °C at 760 mmHg\nA...
13835224,Log Kow (KOWWIN v1.67 estimate): -0.78\nLog Ko...,Experimental Melting Point: -60 °C Alfa Aesar\...
733,Log Kow (KOWWIN v1.67 estimate): -1.65\nLog Ko...,Experimental Melting Point: 18 °C Alfa Aesar\n...
199342,Log Kow (KOWWIN v1.67 estimate): 4.36\nHenrys ...,Experimental LogP: 4.072 Vitas-M STK709232\nAC...
28950,Log Kow (KOWWIN v1.67 estimate): 4.33\nHenrys ...,ACD/Boiling Point: 288.5±0.0 °C at 760 mmHg\nA...
8060,Log Kow (KOWWIN v1.67 estimate): 4.31\nHenrys ...,Experimental Melting Point: 18-20 °C Alfa Aesa...
13850135,Log Kow (KOWWIN v1.67 estimate): 3.56\nLog Kow...,Experimental Melting Point: 77-83 °C (Literatu...


In [7]:
out_df.to_excel('../results/cs_epi_props.xlsx')