In [7]:
import numpy as np
import pandas as pd
from pandas import DataFrame as df
from openeye.oechem import *
from openeye.oeiupac import *

In [17]:
#Returns True if inputted function is a float
def is_number(s):
    if type(s) == tuple:
        return False
    try:
        float(s)
        return True
    except ValueError:
        return False
    
#Returns index of any values in a column that are not floats
def check_value(column):
    problem_index = []
    for i,entry in enumerate(column):
        if is_number(entry) == False:
            problem_index.append(i)
    return problem_index


#Cleans a column of temperature values 
#operations in the function are made by inspection
def clean_temp(value):
    if value[0].isdigit() == False: #fixes terms with characters out in front (i.e. .373.15)
        value = value[1:]
    value = value.replace(',','.')
    value = value.replace('l','1')
    value = value.replace('I','1')
    value = value.replace('i','1')
    value = value.replace(' ', '')
    value = value.replace('A', '.1')
    return value

#Cleans a column of measured values 
def clean_measure(value):
    value = value.replace(" ", "")
    value = value.replace(",", ".")
    value = value.replace('l', "1")
    value = value.replace('I', "1")
    value = value.replace("x10","e")
    value = value.replace("×10", "e")
    value = value.replace("s10", "e")
    value = value.replace("10s", "5")
    value = value.replace('es', "e5")
    value = value.replace('i', '1')
    value = value.replace('>', "")
    value = value.replace('¢', "c")
    value = value.replace('-.', "")
    value = value.replace('O', '0')
    return value

def findOccur(s, ch):
    return [i for i, letter in enumerate(s) if letter == ch]

#Finds the maximum value of a list of lists
def max_value(inputlist):
    maxs = []
    for sublist in inputlist:
        maxs.append(max(sublist))
    return max(maxs)

#Finds the minimum value of a list of lists
def min_value(inputlist):
    mins = []
    for sublist in inputlist:
        mins.append(min(sublist))
    return min(mins)


#If an entry has an uncertainty, return the 
def split_uncertain(value):
    chars = set('+-_±')
    all_index = []
    
    for c in chars:
        if len(findOccur(value,c)) == 0:
            pass
        else:
            all_index.append(findOccur(value,c))
            
    if len(all_index) == 0:
        return value
    
    else:   
        max_i = max_value(all_index)
        min_i = min_value(all_index)
    
        return value[:min_i], value[(max_i+1):]


def extractvalue(measured):
    if type(measured) == str:
        return measured
    else:
        return measured[0]

def extractuncertain(measured):
    if type(measured) == str:
        return '0'
    else:
        return measured[1]

def endswitha(value):
    if value[-1] == 'a':
        return True
    else:
        return False    

def endswithb(value):
    if value[-1] == 'b':
        return True
    else:
        return False

def endswithc(value):
    if value[-1] == 'c':
        return True
    else:
        return False
    
def removeend(string):
    return string[:-1]
    
def check_names(names):
    #Code adapted from Pharmsci 175/275
    bad_names = []
    for name in names:
    #Create a new OEMol
        mol_from_name = OEMol()
        if OEParseIUPACName(mol_from_name, name) == True:
            pass
        else:
            bad_names.append(name)
    return bad_names

def method():
    '''
    GCR: Gas-liquid chromatography with gas phase correction
    GC: Gas-liquid chromatography with no gas phase correction
    EBUL: Differential ebulliometry
    DP: Dew-point method
    NSGC: Non-steady-state gas chromatography
    RGC: Relative gas-liquid chromatography
    HS: Headspace chromatography
    GS: Gas-stripping
    STAT: Differential static cell equilibrium
    LLC: Liquid-liquid chromatography
    KHW: Hexadecane-water partition cofficient indirect method
    KGW: Estimation from gas-water partition coefficients from solubility measurements
    EXT: Extrapolated VLE
    '''
    return 

In [27]:
table = pd.read_csv('rawtable2.csv', delimiter = ',')
table = table.drop(columns = ['Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8'])
table = table[pd.notnull(table['Solvent'])]
table = table[pd.notnull(table['Temp'])]
table = table[pd.notnull(table['Measured'])]
table = table.reset_index(drop = True)

In [30]:
#Many solvent names had an additional footnote
table.loc[table.Solvent.apply(endswitha) == True, 'Notes'] = 'This compound can also be found in Table 1'
table.loc[table.Solvent.apply(endswitha) == True, 'Solvent'] = table.Solvent.apply(removeend)


In [167]:
b= check_names(table.Solvent.unique()) #function from OpenEye package
table.loc[table.Solvent == b[101]]

Unnamed: 0,Solvent,Temp,Measured,Ref,Notes
348,Propyl butymte,290. 15,4454,9,


In [215]:
#Entries 98, 103 were purposely skipped
table.Solvent = table.Solvent.replace({
    b[0]: 'Isobutane', b[1]: '2-Methylhexane', b[2]: 'Cycloheptane', 
    b[3]: 'Methylcyclohexane', b[4]: 'Cyclooctane', b[5]: 'Cis-1,2-dimethyl cyclohexane',
    b[6]: 'Pentylcyclopentane', b[7]: '2-Methylpropene', b[8]: '3-Methyl-1-butene',
    b[9]: '2-Methyl-2-butene', b[10]: '2-Methyl-1-pentene', b[11]: '4-Methyl-1-pentene',
    b[12]: '11~{H}-benzo[b]fluorene', b[13]: 'Benzo[a]anthracene', b[14]: '7,12-dimethylbenzo[a]anthracene',
    b[15]: 'Benzo[a]pyrene', b[16]: '3,4-Benzo[a]pyrene', b[17]: '3-Methylcholanthrene',
    b[18]: '1,2,3-Trimethylbenzene', b[19]: 'Trichlorofluoromethane', b[20]: 'Dichlorodifluoromethane', 
    b[21]: 'Chlorotrifluoromethane', b[22]: 'Nitrotrichloromethane', b[23]: 'Dichlorofluoromethane', 
    b[24]: 'Chlorodifuloromethane', b[25]: 'Difluoromethane', b[26]: 'Chloromethane', 
    b[27]: 'Hexachloroethane', b[28]: 'Chloropentafluoroethane', b[29]: 'Tetrafluoroethene', 
    b[30]: 'Pentachloroethane', b[31]: '1-Bromo-2-chloroethane', b[32]: 'Cis-1,2-Dichloroethene', 
    b[33]: '3-Chloro-1-propene', b[34]: '1,2,3-Trichloropropane', b[35]: '1,2-Dibromopropane', 
    b[36]: '1,3-Dichloropropane', b[37]: '2-Iodopropane', b[38]: 'Octafluorocyclobutane', 
    b[39]: '1-Chloro-2-methylpropane', b[40]: 'Pentachloroethane', b[41]: '1-Bromo-2-chloroethane', 
    b[42]: 'Cis-1,2-dichloroethene', b[43]: '3-Chloro-1-propene', b[44]: '1,2,3-Trichloropropane', 
    b[45]: '1,2-Dibromopropane', b[46]: '1,3-Dichloropropane', b[47]: '2-Iodopropane', 
    b[48]: 'Octafluorocyclobutane', b[49]: '1-Chloro-2-methylpropane', b[50]: '2-Chloro-2-methylbutane', 
    b[51]: 'm-Dichlorobenzene', b[52]: 'o-Dichlorobenzene', b[53]: '1,2,3-Trichlorobenzene', 
    b[54]: '1,2,4-Trichlorobenzene', b[55]: '1,2,3,4-Tetrachlorobenzene', b[56]: '1,2,4,5-Tetrachlorobenzene', 
    b[57]: 'Pentachlorobenzene', b[58]: 'm-Difluorobenzene', b[59]: 'p-Difluorobenzene', 
    b[60]: '1-Pentanol', b[61]: '2-Methyl-1-butanol', b[62]: '2,2-Dimethyl-1-propanol', 
    b[63]: '3-Hexanol', b[64]: '2-Methyl-2-pentanol', b[65]: '2-Methyl-3-pentanol', 
    b[66]: '4-Methyl-2-pentanol', b[67]: '3-Methyl-2-pentanol', b[68]: '3-Methyl-3-pentanol', 
    b[69]: '2,2-Dimethyl-1-butanol', b[70]: '2,3-Dimethyl-2-butanol', b[71]: '3,3-Dimethyl-2-butanol', 
    b[72]: '2-Methyl-4-pentene-3-ol', b[73]: '2-Methyl-2-hexanol', b[74]: '3-Methyl-3-hexanol', 
    b[75]: '2,3-Dimethyl-2-pentanol', b[76]: '2,4-Dimethyl-2-pentanol', b[77]: '2,2-Dimethyl-3-pentanol', 
    b[78]: '2,3-Dimethyl-3-pentanol', b[79]: '2,4-Dimethyl-3-pentanol', b[80]: '3-Ethyl-3-Pentanol', 
    b[81]: '2,2,3-Trimethyl-3-pentanol', b[82]: '1-Nonanol', b[83]: '1-Dodecanol', 
    b[84]: '1-Tetradecanol', b[85]: '1-Hexadecanol', b[86]: '1-Heptadecanol', 
    b[87]: '1,3-Nonanediol', b[88]: '1,4-Dimethyl-2,4-octanediol', b[89]: '2,4-Dimethyl-2,4-nonanediol', 
    b[90]: '2-Propyl-1,3-heptanediol', b[91]: '2-Methyl-3-pentanone', b[92]: '3-Methyl-2-pentanone', 
    b[93]: '4-Methyl-2-pentanone', b[94]: '3,3-Dimethyl-2-butanone', b[95]: '2,4-Dimethyl-3-pentanone', 
    b[96]: '5-Methyl-2-hexanone', b[97]: '2,6-Dimethyl-4-heptanone', 
    b[99]: 'Ethyl propenoate', b[100]: 'Ethyl propionate', b[101]: 'Propyl butyrate', 
    b[102]: 'Isopropyl butyrate', b[103]: '~{tert}-butyl hypochlorite', b[104]: 'Sec-butyl methyl ether', 
    b[105]: 'Sec-butyl methyl, 2-methyl ether', b[106]: '1-propan-2-yloxypropane', b[107]: 'Nonanal', 
    b[108]: '1-Ethyl piperidine', b[109]: '3-Ethyl-4-methylpyridine', b[110]: '1-Propyl piperidine', 
    b[111]: '2-Amino toluene', b[112]: '2-Nitropropane', b[113]: '2-Nitro-1-methoxybenzene', 
    b[114]: 'Carbon bisulfide'
    
    
})

In [226]:
table.loc[table.Solvent == b[18]]

Unnamed: 0,Solvent,Temp,Measured,Ref,Notes
142,"1,2,3-Tdmethylbenzene",288.15,1.115x105,7,
143,"1,2,3-Tdmethylbenzene",298.15,88700,912,
144,"1,2,3-Tdmethylbenzene",298.15,1. 065 x105,7,
145,"1,2,3-Tdmethylbenzene",308.15,9 . 2 5 0 x10 ~,7,
146,"1,2,3-Tdmethylbenzene",318.15,7.836x104,7,


In [222]:
bad_names = check_names(table.Solvent.unique())
bad_names

['2-Metyl-1-pentene',
 '1,2,3-Tdmethylbenzene',
 'Chlorodifuloromethane',
 'Acetic Acid, ethenyl ester',
 'Sec-butyl methyl, 2-methyl ether']

In [227]:
mol_from_name = OEMol()
name = '1,2,3-Trimethylbenzene'
OEParseIUPACName(mol_from_name, name)

True

In [218]:
table.loc[table.Solvent == '2-Metyl-1-pentene']

Unnamed: 0,Solvent,Temp,Measured,Ref,Notes
75,2-Metyl-1-pentene,298.15,5 . 9 9x104,5,


In [220]:
b

['iso-Butane',
 '2-Methyhexane',
 '[[Cycloheptane',
 'I[Methycyclohexane',
 ']lCyclooctane',
 '1,2-Dimethyl cyclohexane(cis)',
 'I Pentylcyclopentane',
 '2-Methyl, propene',
 '3-Methyl,1-butene',
 '2-Methyl,2-butene',
 '2-Methyl,l-pentene',
 '4-Methyl,l-pentene',
 '2,3-Dimethyl, 1-butene',
 '2-Heplene',
 '!2-Methyl,1,3-butadiene',
 'l-Pentyne',
 '1,8-Nonanediyne',
 'p- Methyl styrene',
 '1,2,3-Tdmethylbenzene',
 'Te~-butylbenzene',
 '1-Ethylnaphthalene Biphenyl',
 '1,2-Benzofluorene',
 '2,3-Benzofluorene',
 '1,2-Benzanthracene',
 '7,12-Dimethyl- 1,2-benzanthracene',
 'Benzo[alpyrene',
 '3,4-Benzopyrene',
 ':3- Methyleholanthrene',
 'Benzo[g,h,i]perylene',
 'Trichlomfluoromethane',
 'Dichlomdifluommethane',
 'Chlorotrifluromethane',
 'Nitrotriehlrt mmethane',
 'Dichlomfluoromethane',
 'Chlomdifluoromethane',
 'Diflourtrometharte',
 'Chlommethane',
 'ltexachlomethane',
 'Chlompentafluoroethane',
 'Tetrafluomethene',
 'Pentaehloroethane',
 '1-Bromo, 2-chloroethane',
 'Cis 1,2-Dichloroethe