Notebook to extract SMILES from list of common organic solvents

In [48]:
from rdkit import Chem
import pandas as pd
from urllib.request import urlopen
from urllib.parse import quote

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

def convertNameToSmiles(chemical_name):
    try:
        name_converted = quote(chemical_name)
        url= 'https://cactus.nci.nih.gov/chemical/structure/' + name_converted + '/smiles'
        ans = urlopen(url).read().decode('utf8')
        smiles = ans.split(' ')[0]
        return smiles
    except:
        return False

In [49]:
solvents_pd = pd.read_csv('/home/warren/XChem_projects/car-django-react/Notebooks/In-data/common-organic-solvents-converted.csv', header=None)

In [50]:
solvents_pd

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,acetic acid,C2H4O2,60.052,118,16.6,1.0446,Miscible,6.2,39
1,acetone,C3H6O,58.079,56.05,-94.7,0.7845,Miscible,21.01,-20
2,acetonitrile,C2H3N,41.052,81.65,-43.8,0.7857,Miscible,36.64,6
3,benzene,C6H6,78.11,80.1,5.5,0.8765,0.18,2.28,-11
4,1-butanol,C4H10O,74.12,117.7,-88.6,0.8095,6.3,17.8,37
5,2-butanol,C4H10O,74.12,99.5,-88.5,0.8063,15,17.26,24
6,2-butanone,C4H8O,72.11,79.6,-86.6,0.7999,25.6,18.6,-9
7,t-butyl alcohol,C4H10O,74.12,82.4,25.7,0.7887,Miscible,12.5,11
8,carbon tetrachloride,CCl4,153.82,76.8,-22.6,1.594,0.08,2.24,--
9,chlorobenzene,C6H5Cl,112.56,131.7,-45.3,1.1058,0.05,5.69,28


In [51]:
solvent_names = solvents_pd.iloc[:,0]

In [52]:
solvent_names

0                       acetic acid
1                           acetone
2                      acetonitrile
3                           benzene
4                         1-butanol
5                         2-butanol
6                        2-butanone
7                   t-butyl alcohol
8              carbon tetrachloride
9                     chlorobenzene
10                       chloroform
11                      cyclohexane
12               1,2-dichloroethane
13                diethylene glycol
14                    diethyl ether
15                   dimethyl ether
16             1,2-dimethoxy-ethane
17              dimethyl-formamide 
18               dimethyl sulfoxide
19                      1,4-dioxane
20                          ethanol
21                    ethyl acetate
22                  ethylene glycol
23                         glycerin
24                          heptane
25          hexamethylphosphoramide
26    hexamethylphosphoroustriamide
27                          

In [56]:
# Use Cactus resolver to get smiles
common_solvents = {}

for name in solvent_names:
    smi = convertNameToSmiles(name)
    common_solvents[smi] = name  

In [57]:
common_solvents

{'CC(O)=O': 'acetic acid',
 'CC(C)=O': 'acetone',
 'CC#N': 'acetonitrile',
 'c1ccccc1': 'benzene',
 'CCCCO': '1-butanol',
 'CCC(C)O': '2-butanol',
 'CCC(C)=O': '2-butanone',
 'CC(C)(C)O': 't-butyl alcohol',
 'ClC(Cl)(Cl)Cl': 'carbon tetrachloride',
 'Clc1ccccc1': 'chlorobenzene',
 'ClC(Cl)Cl': 'chloroform',
 'C1CCCCC1': 'cyclohexane',
 'ClCCCl': '1,2-dichloroethane',
 'OCCOCCO': 'diethylene glycol',
 'CCOCC': 'diethyl ether',
 'COC': 'dimethyl ether',
 'COCCOC': '1,2-dimethoxy-ethane',
 'CN(C)C=O': 'dimethyl-formamide ',
 'C[S](C)=O': 'dimethyl sulfoxide',
 'C1COCCO1': '1,4-dioxane',
 'CCO': 'ethanol',
 'CCOC(C)=O': 'ethyl acetate',
 'OCCO': 'ethylene glycol',
 'OCC(O)CO': 'glycerin',
 'CCCCCCC': 'heptane',
 'CN(C)[P](=O)(N(C)C)N(C)C': 'hexamethylphosphoramide',
 'CN(C)P(N(C)C)N(C)C': 'hexamethylphosphoroustriamide',
 'CCCCCC': 'hexane',
 'CO': 'methanol',
 'COC(C)(C)C': 'methyl t-butyl ether',
 'ClCCl': 'methylene chloride',
 'CN1CCCC1=O': 'N-methyl-2-pyrrolidinone',
 'C[N+]([O-])=O':