In [198]:
import tabula
from thermo import ChemicalConstantsPackage
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [199]:
file = 'GPA2145-16.pdf'

In [200]:
def split_pressure(row):
    if pd.isnull(row):
        return np.nan, np.nan
    parts = row.split()
    if len(parts) == 1:
        if row.startswith("35000"):
            return np.nan, "35000."
        elif row.startswith("3840"):
            return "3840.", np.nan
        else:
            return np.nan, parts[0]
    return parts[0], parts[1]

# Molar Mass, Boiling T, Crits, Vapor P, Acentric

In [201]:
dfs1 = tabula.read_pdf(file, stream=True, pages=list(np.arange(8, 14, 1)), area=(21, 5, 92, 100), relative_area=True)
dfs2 = tabula.read_pdf(file, stream=True, pages=list(np.arange(8, 14, 1)), area=(20, 5, 92, 100), relative_area=True)

dfs = [dfs1[0]] + dfs2[1: ]

In [202]:
df_new1 = []
for df in dfs:
    new_names = ['Order', 'Compound', 'Formula', 'Molar Mass [g/mol]', 'Boiling T. [F]', 
                 'Triple Point T. [F]', 'Vapor P. @60F [psia]', 'Vapor P. @100F [psia]', 'Crit T. [F]', 'Crit. P. [psia]',
                'Density [lbm/ft^3]', 'h']
    df.columns = new_names + df.columns[len(new_names):].tolist()
    df_new1.append(df)
df_new1 = pd.concat(df_new1)
df_new1.head()

Unnamed: 0,Order,Compound,Formula,Molar Mass [g/mol],Boiling T. [F],Triple Point T. [F],Vapor P. @60F [psia],Vapor P. @100F [psia],Crit T. [F],Crit. P. [psia],Density [lbm/ft^3],h
0,1,methane,CH4,16.0425,-258.66,-296.42,,5000.0,-116.66,667.1,10.154,0.0114
1,2,ethane,C2H6,30.069,-127.44,-297.01,495.62,800.0,89.91,706.7,12.871,0.0995
2,3,propane,C3H8,44.0956,-43.8,-305.72,107.71,188.62,206.13,616.6,13.764,0.1521
3,4,isobutane,C4H10,58.1222,10.85,-254.96,38.225,72.644,274.39,526.3,14.078,0.1835
4,5,n-butane,C4H10,58.1222,31.12,-216.85,26.027,51.567,305.56,550.6,14.078,0.2008


In [203]:
df_new1.shape

(203, 12)

# Densities

In [204]:
dfs1 = tabula.read_pdf(file, stream=True, pages=list(np.arange(14, 20, 1)), area=(23, 5, 92, 95), relative_area=True)
df1 = dfs[0]
new_names = ['Order', 'Compound', 'Liq. Relative Density @60F:1atm', '', 'API Gravity @60F:1atm', '',
             'Desntiy of Liquid @60F:1atm [lbm/gal]', 'T. Coef. of Density @60F:sat [1/F]', 
             'Ideal Gas Relative Density @60F:1atm', 'Ideal Gas Volume @60F:1atm [ft^3/lbm]',
             'Ideal Gas Density @60F:1atm [lbm/MSCF]', '', '', '']
df1.columns = new_names + df1.columns[len(new_names):].tolist()
df1 = df1.drop('', axis=1)
df1.head()

ValueError: Length mismatch: Expected axis has 12 elements, new values have 14 elements

In [None]:
dfs2 = tabula.read_pdf(file, stream=True, pages=list(np.arange(14, 20, 1)), area=(23, 5, 92, 95), relative_area=True)

dfs_new2 = []
for i, df in enumerate(dfs2):
    if i == 0: continue
    new_names = ['Order', 'Compound', 'Liq. Relative Density @60F:1atm', 'API Gravity @60F:1atm',
                 'Desntiy of Liquid @60F:1atm [lbm/gal]', 'T. Coef. of Density @60F:sat [1/F]', 
                 'Ideal Gas Relative Density @60F:1atm', 'Ideal Gas Volume @60F:1atm [ft^3/lbm]',
                 'Ideal Gas Density @60F:1atm [lbm/MSCF]', '', '', '']
    df.columns = new_names + df.columns[len(new_names):].tolist()
    df = df.drop('', axis=1)
    df.head()
    dfs_new2.append(df)
    
df_new2 = pd.concat([df1] + dfs_new2)
df_new2.head()

In [None]:
df_new2.shape

# summation factor, z-factor, gross, net heating values

In [None]:
dfs1 = tabula.read_pdf(file, stream=True, pages=list(np.arange(20, 26, 1)), area=(22, 5, 92, 100), relative_area=True)
dfs2 = tabula.read_pdf(file, stream=True, pages=list(np.arange(20, 26, 1)), area=(20, 5, 92, 100), relative_area=True)


dfs = [dfs1[0]] + dfs2[1:]

In [None]:
df_new3 = []
for df in dfs:
    new_names = ['Order', 'Compound', 'Summation Factor: z=1-P*b^2 @60F [1/psia^0.5]', 
                 'Summation Factor: z=1-P/P0*b^2, P0=1atm @60F', 'Compressibility Factor @60F',
                'Gross Heating Value Liquid [Btu/lbm]', 'Gross Heating Value Liquid [Btu/gal]',
                'Gross Heating Value Ideal Gas [Btu/lbm]', 'Gross Heating Value Ideal Gas [Btu/ft^3]',
                'Gross Heating Value Ideal Gas [Btu/gal]', 
                'Net Heating Value Liquid [Btu/lbm]', 'Net Heating Value Ideal Gas [Btu/ft^3]']
    df.columns = new_names + df.columns[len(new_names):].tolist()
    df_new3.append(df)
    
df_new3 = pd.concat(df_new3)
df_new3.head()

In [None]:
df_new3.shape

#  Air required to burn, Hvap, Specific Heat, Refractive Index, 

In [None]:
dfs1 = tabula.read_pdf(file, stream=True, pages=list(np.arange(26, 32, 1)), area=(25, 5, 92, 100), relative_area=True)
dfs2 = tabula.read_pdf(file, stream=True, pages=list(np.arange(26, 32, 1)), area=(24, 5, 92, 100), relative_area=True)

dfs = [dfs1[0]] + dfs2[1:]

df_new4 = []
for df in dfs:
    new_names = ['Order', 'Compound', 'Volume of Air Required to Burn One Vol. of Ideal Gas', 
                 'Heat of Vaporization @1atm [Btu/lbm]', 
                 'Specific Heat Cp, ideal gas @15C [Btu/(lbm*F)]',
                 'Specific Heat Cv, ideal gas @15C [Btu/(lbm*F)]',
                 'Specific Heat Csat., liquid @15C [Btu/(lbm*F)]',
                 'k=Cp/Cv', 'Refractive Index, nD @15C', 
                 'Flammability Limits Lower @100F,1atm [volume % in air]',
                 'Octane Number - Motor Method D-357', 'Octane Number - Research Method D-908',
                ]
    df.columns = new_names + df.columns[len(new_names):].tolist()
    
    df[['Flammability Limits Lower @100F,1atm [volume % in air]', 'Flammability Limits Upper @100F,1atm [volume % in air]']] = df.apply(
        lambda row: split_pressure(row['Flammability Limits Lower @100F,1atm [volume % in air]']), axis=1, result_type="expand"
    )
    cols = list(df)
    cols.insert(10, cols.pop(cols.index('Flammability Limits Upper @100F,1atm [volume % in air]')))
    df = df.loc[:, cols]
    df_new4.append(df)
    
df_new4 = pd.concat(df_new4)
df_new4 = df_new4.drop('-.4', axis=1)

In [None]:
df_new4.head()

In [205]:
df_new4.shape

(203, 13)

# Merge

In [206]:
from functools import reduce


dfs = [df_new1, df_new2, df_new3, df_new4]

def drop_compound_if_not_first(df, first=True):
    if not first:
        return df.drop('Compound', axis=1)
    return df

dfs = [drop_compound_if_not_first(df, i == 0) for i, df in enumerate(dfs)]
merged_df = reduce(lambda left, right: pd.merge(left, right, on='Order', how='outer'), dfs)

In [207]:
merged_df.shape

(203, 40)

In [208]:
merged_df.head()

Unnamed: 0,Order,Compound,Formula,Molar Mass [g/mol],Boiling T. [F],Triple Point T. [F],Vapor P. @60F [psia],Vapor P. @100F [psia],Crit T. [F],Crit. P. [psia],Density [lbm/ft^3],h,Liq. Relative Density @60F:1atm,API Gravity @60F:1atm,Desntiy of Liquid @60F:1atm [lbm/gal],T. Coef. of Density @60F:sat [1/F],Ideal Gas Relative Density @60F:1atm,Ideal Gas Volume @60F:1atm [ft^3/lbm],Ideal Gas Density @60F:1atm [lbm/MSCF],Summation Factor: z=1-P*b^2 @60F [1/psia^0.5],"Summation Factor: z=1-P/P0*b^2, P0=1atm @60F",Compressibility Factor @60F,Gross Heating Value Liquid [Btu/lbm],Gross Heating Value Liquid [Btu/gal],Gross Heating Value Ideal Gas [Btu/lbm],Gross Heating Value Ideal Gas [Btu/ft^3],Gross Heating Value Ideal Gas [Btu/gal],Net Heating Value Liquid [Btu/lbm],Net Heating Value Ideal Gas [Btu/ft^3],Volume of Air Required to Burn One Vol. of Ideal Gas,Heat of Vaporization @1atm [Btu/lbm],"Specific Heat Cp, ideal gas @15C [Btu/(lbm*F)]","Specific Heat Cv, ideal gas @15C [Btu/(lbm*F)]","Specific Heat Csat., liquid @15C [Btu/(lbm*F)]",k=Cp/Cv,"Refractive Index, nD @15C","Flammability Limits Lower @100F,1atm [volume % in air]","Flammability Limits Upper @100F,1atm [volume % in air]",Octane Number - Motor Method D-357,Octane Number - Research Method D-908
0,1,methane,CH4,16.0425,-258.66,-296.42,,5000.0,-116.66,667.1,10.154,0.0114,0.3,340.0,2.5,,0.5539,23.6549,42.27,0.0116,0.04438,,,,23892.0,1010.0,59729.0,,909.4,9.552,219.6,0.5266,0.4028,,1.3073,1.00042,5.0,15.0,,
1,2,ethane,C2H6,30.069,-127.44,-297.01,495.62,800.0,89.91,706.7,12.871,0.0995,0.35628,265.66,2.9704,-0.00733,1.0382,12.6204,79.24,0.0238,0.0913,0.5788,22185.0,65897.0,22334.0,1769.7,66340.0,20281.0,1619.0,16.715,210.4,0.4079,0.3418,0.9664,1.1932,1.00072,2.9,13.0,0.05,1.6
2,3,propane,C3H8,44.0956,-43.8,-305.72,107.71,188.62,206.13,616.6,13.764,0.1521,0.50719,147.49,4.2285,-0.00167,1.5225,8.6059,116.2,0.0347,0.1329,0.8499,21491.0,90875.0,21654.0,2516.1,91563.0,19760.0,2315.0,23.879,183.0,0.3873,0.3423,0.6183,1.1316,1.00103,2.0,9.5,97.1,1.8
3,4,isobutane,C4H10,58.1222,10.85,-254.96,38.225,72.644,274.39,526.3,14.078,0.1835,0.56283,119.91,4.6925,-0.00121,2.0068,6.529,153.16,0.0441,0.169,0.9188,21081.0,98924.0,21232.0,3251.9,99630.0,19440.0,3000.0,31.043,157.0,0.3866,0.3525,0.5641,1.0969,1.00123,1.8,8.5,97.6,0.1
4,5,n-butane,C4H10,58.1222,31.12,-216.85,26.027,51.567,305.56,550.6,14.078,0.2008,0.5842,110.71,4.8706,-0.00108,2.0068,6.529,153.16,0.047,0.1801,0.9386,21137.0,102950.0,21300.0,3262.3,103740.0,19495.0,3011.0,31.043,165.8,0.3949,0.3608,0.5692,1.0947,1.00133,1.5,9.0,89.6,93.8


# GPA Compounds and CASs number matching

In [221]:
compounds = merged_df['Compound'].values

comp_dict = {comp: None for comp in compounds}
for comp in merged_df['Compound'].values:
    try:
        constants = ChemicalConstantsPackage.constants_from_IDs([comp])
        CAS = constants.CASs
        #print("'%s': '%s'" % (comp, CAS[0]))
    except:
        CAS = ['']
        if comp == 'n-heptane': CAS = ['142-82-5']
        if comp == '(Z)-2-pentene': CAS = ['627-20-3']
        if comp == '1,trans-2,cis-4-trimethylcyclopentane': CAS = ['16883-48-0']
        if comp == 'r-1,t-2,c-3-trimethylcyclopentane': CAS = ['15890-40-1']
        if comp == '1,cis-2,trans-4-trimethylcyclopentane': CAS = ['4850-28-6']
        if comp == 'r-1,c-2,t-3-trimethylcyclopentane': CAS = ['19374-46-0']
        if comp == '1,cis-2,cis-3-trimethylcyclopentane': CAS = ['2613-69-6']
        if comp == 'r-1,c-3,c-5-trimethylcyclohexane': CAS = ['1795-27-3']
        if comp == 'r-1,c-3,t-5-trimethylcyclohexane': CAS = ['1795-26-2']
        if comp == 'cis-1,2,trans-1,3-cyclohexane': CAS = ['7667-55-2']
        if comp == 'r-1,t-2,c-3-trimethylcyclohexane': CAS = ['1678-81-5']
            
        if comp == 'cis,cis-1,2,3-trimethylcyclohexane': CAS = ['1678-97-3']
        if comp == 'cis-1,2,cis-1,4-1,2,4-trimethylcyclohexane': CAS= ['2234-75-5']
        if comp == 'n-undecane': CAS = ['1120-21-4']
            
        #try:
        #    search_chemical(CAS[0])
        #except:
            # compound not found in the chemical library
            #print(comp, CAS[0], 'DELETED ----------------------------------------')
            #del comp_dict[comp]
        #    continue
            
        #if CAS[0] == '':  print('-------------')
        print("'%s': '%s'" % (comp, CAS[0]))
    comp_dict[comp] = CAS[0]

# delete compounds that I wasn't able to identify their CAS numbers. Mostly complex trimethylcyclohexane compounds
#comp_dict = {k: v for k, v in comp_dict.items() if v != ""}

#del comp_dict['trans-1,4-dimethylcyclohexane']
#del comp_dict['trans-1,3-dimethylcyclohexane']
#del comp_dict['trans-1-ethyl-3-methylcyclopentane']


'n-heptane': '142-82-5'
'(Z)-2-pentene': '627-20-3'
'1,trans-2,cis-4-trimethylcyclopentane': '16883-48-0'
'r-1,t-2,c-3-trimethylcyclopentane': '15890-40-1'
'1,cis-2,trans-4-trimethylcyclopentane': '4850-28-6'
'r-1,c-2,t-3-trimethylcyclopentane': '19374-46-0'
'1,cis-2,cis-3-trimethylcyclopentane': '2613-69-6'
'r-1,c-3,c-5-trimethylcyclohexane': '1795-27-3'
'r-1,c-3,t-5-trimethylcyclohexane': '1795-26-2'
'trans,trans-1,2,4-trimethylcyclohexane': ''
'cis-1,2,trans-1,4-1,2,4-trimethylcyclohexane': ''
'r-1,t-2,c-3-trimethylcyclohexane': '1678-81-5'
'r-1,t-2,t-4-trimethylcyclohexane': ''
'cis-1,2,cis-1,4-1,2,4-trimethylcyclohexane': '2234-75-5'
'cis,cis-1,2,3-trimethylcyclohexane': '1678-97-3'
'cis-1,2,trans-1,3-cyclohexane': '7667-55-2'
'n-undecane': '1120-21-4'


In [222]:
len(set(comp_dict.keys()))

203

In [223]:
len(set(comp_dict.values()))

198

In [224]:
comp_df = pd.DataFrame(list(comp_dict.items()), columns=['Compound', 'CAS'])
final = pd.merge(merged_df, comp_df, on='Compound', how='inner')
cols = list(final)
cols.insert(2, cols.pop(cols.index('CAS')))
final = final.loc[:, cols]

In [225]:
cols = list(final)
cols.insert(2, cols.pop(cols.index('CAS')))
final = final.loc[:, cols]
final

Unnamed: 0,Order,Compound,CAS,Formula,Molar Mass [g/mol],Boiling T. [F],Triple Point T. [F],Vapor P. @60F [psia],Vapor P. @100F [psia],Crit T. [F],Crit. P. [psia],Density [lbm/ft^3],h,Liq. Relative Density @60F:1atm,API Gravity @60F:1atm,Desntiy of Liquid @60F:1atm [lbm/gal],T. Coef. of Density @60F:sat [1/F],Ideal Gas Relative Density @60F:1atm,Ideal Gas Volume @60F:1atm [ft^3/lbm],Ideal Gas Density @60F:1atm [lbm/MSCF],Summation Factor: z=1-P*b^2 @60F [1/psia^0.5],"Summation Factor: z=1-P/P0*b^2, P0=1atm @60F",Compressibility Factor @60F,Gross Heating Value Liquid [Btu/lbm],Gross Heating Value Liquid [Btu/gal],Gross Heating Value Ideal Gas [Btu/lbm],Gross Heating Value Ideal Gas [Btu/ft^3],Gross Heating Value Ideal Gas [Btu/gal],Net Heating Value Liquid [Btu/lbm],Net Heating Value Ideal Gas [Btu/ft^3],Volume of Air Required to Burn One Vol. of Ideal Gas,Heat of Vaporization @1atm [Btu/lbm],"Specific Heat Cp, ideal gas @15C [Btu/(lbm*F)]","Specific Heat Cv, ideal gas @15C [Btu/(lbm*F)]","Specific Heat Csat., liquid @15C [Btu/(lbm*F)]",k=Cp/Cv,"Refractive Index, nD @15C","Flammability Limits Lower @100F,1atm [volume % in air]","Flammability Limits Upper @100F,1atm [volume % in air]",Octane Number - Motor Method D-357,Octane Number - Research Method D-908
0,1,methane,74-82-8,CH4,16.0425,-258.66,-296.42,,5000.0,-116.66,667.1,10.154,0.0114,0.3,340.0,2.5,,0.5539,23.6549,42.27,0.0116,0.04438,,,,23892.0,1010.0,59729.0,,909.4,9.552,219.6,0.5266,0.4028,,1.3073,1.00042,5.0,15.0,,
1,2,ethane,74-84-0,C2H6,30.069,-127.44,-297.01,495.62,800.0,89.91,706.7,12.871,0.0995,0.35628,265.66,2.9704,-0.00733,1.0382,12.6204,79.24,0.0238,0.0913,0.5788,22185.0,65897.0,22334.0,1769.7,66340.0,20281.0,1619.0,16.715,210.4,0.4079,0.3418,0.9664,1.1932,1.00072,2.9,13.0,0.05,1.6
2,3,propane,74-98-6,C3H8,44.0956,-43.8,-305.72,107.71,188.62,206.13,616.6,13.764,0.1521,0.50719,147.49,4.2285,-0.00167,1.5225,8.6059,116.2,0.0347,0.1329,0.8499,21491.0,90875.0,21654.0,2516.1,91563.0,19760.0,2315.0,23.879,183.0,0.3873,0.3423,0.6183,1.1316,1.00103,2.0,9.5,97.1,1.8
3,4,isobutane,75-28-5,C4H10,58.1222,10.85,-254.96,38.225,72.644,274.39,526.3,14.078,0.1835,0.56283,119.91,4.6925,-0.00121,2.0068,6.529,153.16,0.0441,0.169,0.9188,21081.0,98924.0,21232.0,3251.9,99630.0,19440.0,3000.0,31.043,157.0,0.3866,0.3525,0.5641,1.0969,1.00123,1.8,8.5,97.6,0.1
4,5,n-butane,106-97-8,C4H10,58.1222,31.12,-216.85,26.027,51.567,305.56,550.6,14.078,0.2008,0.5842,110.71,4.8706,-0.00108,2.0068,6.529,153.16,0.047,0.1801,0.9386,21137.0,102950.0,21300.0,3262.3,103740.0,19495.0,3011.0,31.043,165.8,0.3949,0.3608,0.5692,1.0947,1.00133,1.5,9.0,89.6,93.8
5,6,isopentane,78-78-4,C5H12,72.1488,82.09,-256.9,9.417,20.474,368.96,489.9,14.733,0.2274,0.62514,94.848,5.212,-0.00089,2.4911,5.2597,190.12,0.0576,0.2206,0.9677,20891.0,108880.0,21044.0,4000.9,109680.0,19303.0,3699.0,38.206,147.6,0.3828,0.3553,0.5311,1.0775,1.35639,1.3,8.0,90.3,92.3
6,7,n-pentane,109-66-0,C5H12,72.1488,96.91,-201.42,6.888,15.576,385.79,488.8,14.483,0.2515,0.63071,92.85,5.2584,-0.00086,2.4911,5.2597,190.12,0.0606,0.2323,0.974,20923.0,110020.0,21085.0,4008.7,110870.0,19335.0,3707.0,38.206,153.7,0.3879,0.3603,0.5427,1.0764,1.35981,1.4,8.3,62.6,61.7
7,8,n-hexane,110-54-3,C6H14,86.1754,155.68,-139.58,1.914,4.961,453.8,436.9,14.557,0.2986,0.66406,81.582,5.5364,-0.00076,2.9754,4.4036,227.09,0.0776,0.2973,0.9884,20783.0,115060.0,20943.0,4755.9,115950.0,19233.0,4404.0,45.37,144.0,0.3857,0.3626,0.5285,1.0635,1.37673,1.1,7.7,26.0,24.8
8,9,n-heptane,142-82-5,C7H16,100.2019,209.08,-131.08,0.5413,1.619,512.56,396.8,14.483,0.3494,0.68823,74.101,5.7379,-0.00068,3.4597,3.7872,264.05,0.0951,0.3646,0.9951,20680.0,118660.0,20839.0,5502.6,119570.0,19155.0,5100.0,52.534,136.2,0.3841,0.3643,0.5268,1.0544,1.38995,1.0,7.0,,
9,10,n-octane,111-65-9,C8H18,114.2285,258.12,-70.2,0.1546,0.5349,564.22,360.7,14.664,0.3971,0.70655,68.768,5.8907,-0.00063,3.944,3.3221,301.01,0.1128,0.4326,0.998,20601.0,121350.0,20760.0,6249.0,122290.0,19096.0,5796.0,59.697,129.9,0.3829,0.3655,0.524,1.0476,1.3996,0.8,6.5,,


In [226]:
len(set(final['Compound']))

203

In [227]:
len(set(final['CAS']))

198

In [228]:
export_name = 'GPA 2145-16 Compound Properties Table - English'
#final.to_csv('%s.csv' % export_name, index=False)
final.to_pickle("%s.pkl" % export_name)  
final.to_excel("%s.xlsx" % export_name, index=False)