In [7]:
import tabula
from thermo import ChemicalConstantsPackage
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [8]:
file = 'GPA2145-16.pdf'

# Molar Mass, Boiling T, Crits, Vapor P, Acentric

In [9]:
dfs = tabula.read_pdf(file, stream=True, pages=list(np.arange(35, 41, 1)))
print(len(dfs))

6


In [10]:
def split_pressure(row):
    if pd.isnull(row):
        return np.nan, np.nan
    parts = row.split()
    if len(parts) == 1:
        if row.startswith("35000"):
            return np.nan, "35000."
        elif row.startswith("3840"):
            return "3840.", np.nan
        else:
            return np.nan, parts[0]
    return parts[0], parts[1]

In [11]:
df_new1 = []
for df in dfs:
    new_names = ['Order', 'Compound', 'Formula', 'Molar Mass [g/mol]', 'Boiling T. [K]', '--', 
                 'Triple Point T. [K]', 'Vapor P. @15C [kPa]', 'Crit T. [K]', 'Crit. P. [kPa]']
    df.columns = new_names + df.columns[len(new_names):].tolist()
    df = df.iloc[5:,:]
    df[['Vapor P. @15C [kPa]', 'Vapor P. @40C [kPa]']] = df.apply(
        lambda row: split_pressure(row['Vapor P. @15C [kPa]']), axis=1, result_type="expand"
    )
    cols = list(df)
    cols.insert(8, cols.pop(cols.index('Vapor P. @40C [kPa]')))
    df = df.loc[:, cols]


    df[['Crit. P. [kPa]', 'Crit. Density [kg/m^3]']] = df.apply(
        lambda row: split_pressure(row['Crit. P. [kPa]']), axis=1, result_type="expand"
    )
    cols = list(df)
    cols.insert(11, cols.pop(cols.index('Crit. Density [kg/m^3]')))
    df = df.loc[:, cols]
    df = df.drop('--', axis=1)
    df_new1.append(df)
    
df_new1 = pd.concat(df_new1)
df_new1.head()

Unnamed: 0,Order,Compound,Formula,Molar Mass [g/mol],Boiling T. [K],Triple Point T. [K],Vapor P. @15C [kPa],Vapor P. @40C [kPa],Crit T. [K],Crit. P. [kPa],Crit. Density [kg/m^3],h
5,1.0,methane,CH4,16.0425,111.67,90.69,,35000.0,190.56,4599.0,162.66,0.0114
6,2.0,ethane,C2H6,30.069,184.57,90.37,3375.5,5500.0,305.32,4872.2,206.18,0.0995
7,3.0,propane,C3H8,44.0956,231.04,85.53,731.51,1369.4,369.89,4251.2,220.48,0.1521
8,4.0,isobutane,C4H10,58.1222,261.4,113.73,258.99,531.21,407.81,3629.0,225.5,0.1835
9,5.0,n-butane,C4H10,58.1222,272.66,134.9,176.15,378.49,425.13,3796.0,228.0,0.2008


In [12]:
df_new1.shape

(203, 12)

# Densities

In [13]:
dfs = tabula.read_pdf(file, stream=True, pages=list(np.arange(41, 47, 1)))
print(len(dfs))

6


In [14]:
df_new2 = []
for df in dfs:
    new_names = ['Order', 'Compound', '--', 'Liq. Relative Density @15C:1atm', 'API Gravity @15C:1atm', 
                 'T. Coef. of Density @15C:sat [1/K]', 'Ideal Gas Relative Density @15C:1atm', 
                 'Ideal Gas Density @15C:1atm[kg/m^3]']
    df.columns = new_names + df.columns[len(new_names):].tolist()
    df = df.iloc[7:,:]

    df[['API Gravity @15C:1atm', 'Desntiy of Liquid @15C:1atm [kg/m^3]']] = df.apply(
        lambda row: split_pressure(row['API Gravity @15C:1atm']), axis=1, result_type="expand"
    )
    cols = list(df)
    cols.insert(5, cols.pop(cols.index('Desntiy of Liquid @15C:1atm [kg/m^3]')))
    df = df.loc[:, cols]


    df[['Ideal Gas Relative Density @15C:1atm', 'Ideal Gas Relative Volume @15C:1atm [m^3/kg]']] = df.apply(
        lambda row: split_pressure(row['Ideal Gas Relative Density @15C:1atm']), axis=1, result_type="expand"
    )

    cols = list(df)
    cols.insert(8, cols.pop(cols.index('Ideal Gas Relative Volume @15C:1atm [m^3/kg]')))
    df = df.loc[:, cols]
    df = df.drop('--', axis=1)
    df = df.drop('b,d,g', axis=1)
    df_new2.append(df)
    
df_new2 = pd.concat(df_new2)
df_new2.head()

Unnamed: 0,Order,Compound,Liq. Relative Density @15C:1atm,API Gravity @15C:1atm,Desntiy of Liquid @15C:1atm [kg/m^3],T. Coef. of Density @15C:sat [1/K],Ideal Gas Relative Density @15C:1atm,Ideal Gas Relative Volume @15C:1atm [m^3/kg],Ideal Gas Density @15C:1atm[kg/m^3]
7,1.0,methane,0.3,340.0,300.0,,0.5539,1.4739,0.67848
8,2.0,ethane,0.35817,263.56,357.85,-0.01277,1.0382,0.7864,1.2717
9,3.0,propane,0.50796,147.07,507.5,-0.00299,1.5225,0.5362,1.8649
10,4.0,isobutane,0.56346,119.63,562.95,-0.00217,2.0068,0.4068,2.4581
11,5.0,n-butane,0.58478,110.47,584.25,-0.00194,2.0068,0.4068,2.4581


In [15]:
df_new2.shape

(203, 9)

# summation factor, z-factor, gross, net heating values

In [16]:
dfs = tabula.read_pdf(file, stream=True, pages=list(np.arange(47, 53, 1)), area=(24, 5, 92, 100), relative_area=True)
print(len(dfs))

6


In [17]:
df_new3 = []
for df in dfs:
    new_names = ['Order', 'Compound', 'Summation Factor: z=1-P*b^2 @15C [1/kPa^0.5]', 
                 'Summation Factor: z=1-P/P0*b^2, P0=101.325kPa @15C', 'Compressibility Factor @15C',
                'Gross Heating Value Liquid [MJ/kg]', 'Gross Heating Value Liquid [MJ/m^3]',
                'Gross Heating Value Ideal Gas [MJ/kg]', 'Gross Heating Value Ideal Gas [MJ/m^3]',
                'Net Heating Value Liquid [MJ/kg]', 'Net Heating Value Ideal Gas [MJ/m^3]']
    df.columns = new_names + df.columns[len(new_names):].tolist()
    df_new3.append(df)
    
df_new3 = pd.concat(df_new3)
df_new3.head()

Unnamed: 0,Order,Compound,Summation Factor: z=1-P*b^2 @15C [1/kPa^0.5],"Summation Factor: z=1-P/P0*b^2, P0=101.325kPa @15C",Compressibility Factor @15C,Gross Heating Value Liquid [MJ/kg],Gross Heating Value Liquid [MJ/m^3],Gross Heating Value Ideal Gas [MJ/kg],Gross Heating Value Ideal Gas [MJ/m^3],Net Heating Value Liquid [MJ/kg],Net Heating Value Ideal Gas [MJ/m^3]
0,1,methane,0.00442,0.04453,,,,55.575,37.706,,33.95
1,2,ethane,0.0091,0.0916,0.5834,51.604,18467.0,51.951,66.066,47.172,60.43
2,3,propane,0.0132,0.1333,0.8514,49.99,25370.0,50.369,93.934,45.961,86.42
3,4,isobutane,0.0168,0.1695,0.9198,49.038,27606.0,49.388,121.4,45.216,112.0
4,5,n-butane,0.0179,0.1807,0.9394,49.166,28725.0,49.546,121.79,45.345,112.4


In [18]:
df_new3.shape

(203, 11)

#  Air required to burn, Hvap, Specific Heat, Refractive Index, 

In [133]:
dfs0 = tabula.read_pdf(file, stream=True, pages=list(np.arange(53, 59, 1)), area=(26.5, 5, 92, 100), relative_area=True)
dfs1 = tabula.read_pdf(file, stream=True, pages=list(np.arange(53, 59, 1)), area=(26, 5, 92, 100), relative_area=True)[1:]
dfs1 = [df.drop('-.3', axis=1) for df in dfs1]

dfs = [dfs0[0]] + dfs1

df_new4 = []
for i, df in enumerate(dfs):
    new_names = ['Order', 'Compound', '--', 'Volume of Air Required to Burn One Vol. of Ideal Gas', 
                 'Heat of Vaporization @1atm [kJ/kg]', 
                 'Specific Heat Cp, ideal gas @15C [kJ/(kg*K)]',
                 'Specific Heat Cv, ideal gas @15C [kJ/(kg*K)]',
                 'Specific Heat Csat., liquid @15C [kJ/(kg*K)]',
                 'k=Cp/Cv', 'Refractive Index, nD @15C', 
                 'Flammability Limits Lower @310.93K,1atm [volume % in air]',
                 'Octane Number - Motor Method D-357', 'Octane Number - Research Method D-908',
                ]
    df.columns = new_names + df.columns[len(new_names):].tolist()
    
    df[['Flammability Limits Lower @310.93K,1atm [volume % in air]', 'Flammability Limits Upper @310.93K,1atm [volume % in air]']] = df.apply(
        lambda row: split_pressure(row['Flammability Limits Lower @310.93K,1atm [volume % in air]']), axis=1, result_type="expand"
    )
    cols = list(df)
    cols.insert(11, cols.pop(cols.index('Flammability Limits Upper @310.93K,1atm [volume % in air]')))
    df = df.loc[:, cols]
    
    if i == 4:
        df['--'], df['Volume of Air Required to Burn One Vol. of Ideal Gas'] = df['Volume of Air Required to Burn One Vol. of Ideal Gas'], df['--']
    
    df = df.drop('--', axis=1)
    df_new4.append(df)
    
df_new4 = pd.concat(df_new4)

In [134]:
df_new4.head()

Unnamed: 0,Order,Compound,Volume of Air Required to Burn One Vol. of Ideal Gas,Heat of Vaporization @1atm [kJ/kg],"Specific Heat Cp, ideal gas @15C [kJ/(kg*K)]","Specific Heat Cv, ideal gas @15C [kJ/(kg*K)]","Specific Heat Csat., liquid @15C [kJ/(kg*K)]",k=Cp/Cv,"Refractive Index, nD @15C","Flammability Limits Lower @310.93K,1atm [volume % in air]","Flammability Limits Upper @310.93K,1atm [volume % in air]",Octane Number - Motor Method D-357,Octane Number - Research Method D-908
0,1,methane,9.552,510.8,2.2036,1.6853,,1.3075,1.00042,5.0,15.0,,
1,2,ethane,16.715,489.4,1.7054,1.4289,4.005,1.1935,1.00072,2.9,13.0,0.05,1.6
2,3,propane,23.879,425.6,1.6191,1.4305,2.584,1.1318,1.00103,2.0,9.5,97.1,1.8
3,4,isobutane,31.043,365.1,1.6162,1.4731,2.3586,1.0971,1.00123,1.8,8.5,97.6,0.1
4,5,n-butane,31.043,385.7,1.6511,1.508,2.3805,1.0949,1.00133,1.5,9.0,89.6,93.8


In [135]:
df_new4.shape

(203, 13)

# Merge

In [136]:
from functools import reduce


dfs = [df_new1, df_new2, df_new3, df_new4]

def drop_compound_if_not_first(df, first=True):
    if not first:
        return df.drop('Compound', axis=1)
    return df

dfs = [drop_compound_if_not_first(df, i == 0) for i, df in enumerate(dfs)]
merged_df = reduce(lambda left, right: pd.merge(left, right, on='Order', how='outer'), dfs)

In [137]:
merged_df.shape

(203, 39)

In [138]:
merged_df.head()

Unnamed: 0,Order,Compound,Formula,Molar Mass [g/mol],Boiling T. [K],Triple Point T. [K],Vapor P. @15C [kPa],Vapor P. @40C [kPa],Crit T. [K],Crit. P. [kPa],Crit. Density [kg/m^3],h,Liq. Relative Density @15C:1atm,API Gravity @15C:1atm,Desntiy of Liquid @15C:1atm [kg/m^3],T. Coef. of Density @15C:sat [1/K],Ideal Gas Relative Density @15C:1atm,Ideal Gas Relative Volume @15C:1atm [m^3/kg],Ideal Gas Density @15C:1atm[kg/m^3],Summation Factor: z=1-P*b^2 @15C [1/kPa^0.5],"Summation Factor: z=1-P/P0*b^2, P0=101.325kPa @15C",Compressibility Factor @15C,Gross Heating Value Liquid [MJ/kg],Gross Heating Value Liquid [MJ/m^3],Gross Heating Value Ideal Gas [MJ/kg],Gross Heating Value Ideal Gas [MJ/m^3],Net Heating Value Liquid [MJ/kg],Net Heating Value Ideal Gas [MJ/m^3],Volume of Air Required to Burn One Vol. of Ideal Gas,Heat of Vaporization @1atm [kJ/kg],"Specific Heat Cp, ideal gas @15C [kJ/(kg*K)]","Specific Heat Cv, ideal gas @15C [kJ/(kg*K)]","Specific Heat Csat., liquid @15C [kJ/(kg*K)]",k=Cp/Cv,"Refractive Index, nD @15C","Flammability Limits Lower @310.93K,1atm [volume % in air]","Flammability Limits Upper @310.93K,1atm [volume % in air]",Octane Number - Motor Method D-357,Octane Number - Research Method D-908
0,1.0,methane,CH4,16.0425,111.67,90.69,,35000.0,190.56,4599.0,162.66,0.0114,0.3,340.0,300.0,,0.5539,1.4739,0.67848,0.00442,0.04453,,,,55.575,37.706,,33.95,9.552,510.8,2.2036,1.6853,,1.3075,1.00042,5.0,15.0,,
1,2.0,ethane,C2H6,30.069,184.57,90.37,3375.5,5500.0,305.32,4872.2,206.18,0.0995,0.35817,263.56,357.85,-0.01277,1.0382,0.7864,1.2717,0.0091,0.0916,0.5834,51.604,18467.0,51.951,66.066,47.172,60.43,16.715,489.4,1.7054,1.4289,4.005,1.1935,1.00072,2.9,13.0,0.05,1.6
2,3.0,propane,C3H8,44.0956,231.04,85.53,731.51,1369.4,369.89,4251.2,220.48,0.1521,0.50796,147.07,507.5,-0.00299,1.5225,0.5362,1.8649,0.0132,0.1333,0.8514,49.99,25370.0,50.369,93.934,45.961,86.42,23.879,425.6,1.6191,1.4305,2.584,1.1318,1.00103,2.0,9.5,97.1,1.8
3,4.0,isobutane,C4H10,58.1222,261.4,113.73,258.99,531.21,407.81,3629.0,225.5,0.1835,0.56346,119.63,562.95,-0.00217,2.0068,0.4068,2.4581,0.0168,0.1695,0.9198,49.038,27606.0,49.388,121.4,45.216,112.0,31.043,365.1,1.6162,1.4731,2.3586,1.0971,1.00123,1.8,8.5,97.6,0.1
4,5.0,n-butane,C4H10,58.1222,272.66,134.9,176.15,378.49,425.13,3796.0,228.0,0.2008,0.58478,110.47,584.25,-0.00194,2.0068,0.4068,2.4581,0.0179,0.1807,0.9394,49.166,28725.0,49.546,121.79,45.345,112.4,31.043,385.7,1.6511,1.508,2.3805,1.0949,1.00133,1.5,9.0,89.6,93.8


# GPA Compounds and CASs number matching

In [156]:
compounds = merged_df['Compound'].values

comp_dict = {comp: None for comp in compounds}
for comp in merged_df['Compound'].values:
    try:
        constants = ChemicalConstantsPackage.constants_from_IDs([comp])
        CAS = constants.CASs
        #print("'%s': '%s'" % (comp, CAS[0]))
    except:
        CAS = ['']
        if comp == 'n-heptane': CAS = ['142-82-5']
        if comp == '(Z)-2-pentene': CAS = ['627-20-3']
        if comp == '1,trans-2,cis-4-trimethylcyclopentane': CAS = ['16883-48-0']
        if comp == 'r-1,t-2,c-3-trimethylcyclopentane': CAS = ['15890-40-1']
        if comp == '1,cis-2,trans-4-trimethylcyclopentane': CAS = ['4850-28-6']
        if comp == 'r-1,c-2,t-3-trimethylcyclopentane': CAS = ['19374-46-0']
        if comp == '1,cis-2,cis-3-trimethylcyclopentane': CAS = ['2613-69-6']
        if comp == 'r-1,c-3,c-5-trimethylcyclohexane': CAS = ['1795-27-3']
        if comp == 'r-1,c-3,t-5-trimethylcyclohexane': CAS = ['1795-26-2']
        if comp == 'cis-1,2,trans-1,3-cyclohexane': CAS = ['7667-55-2']
        if comp == 'r-1,t-2,c-3-trimethylcyclohexane': CAS = ['1678-81-5']
            
        if comp == 'cis,cis-1,2,3-trimethylcyclohexane': CAS = ['1678-97-3']
        if comp == 'cis-1,2,cis-1,4-1,2,4-trimethylcyclohexane': CAS= ['2234-75-5']
        if comp == 'n-undecane': CAS = ['1120-21-4']
            
        #try:
        #    search_chemical(CAS[0])
        #except:
            # compound not found in the chemical library
            #print(comp, CAS[0], 'DELETED ----------------------------------------')
            #del comp_dict[comp]
        #    continue
            
        #if CAS[0] == '':  print('-------------')
        print("'%s': '%s'" % (comp, CAS[0]))
    comp_dict[comp] = CAS[0]

# delete compounds that I wasn't able to identify their CAS numbers. Mostly complex trimethylcyclohexane compounds
#comp_dict = {k: v for k, v in comp_dict.items() if v != ""}

#del comp_dict['trans-1,4-dimethylcyclohexane']
#del comp_dict['trans-1,3-dimethylcyclohexane']
#del comp_dict['trans-1-ethyl-3-methylcyclopentane']


'n-heptane': '142-82-5'
'(Z)-2-pentene': '627-20-3'
'1,trans-2,cis-4-trimethylcyclopentane': '16883-48-0'
'r-1,t-2,c-3-trimethylcyclopentane': '15890-40-1'
'1,cis-2,trans-4-trimethylcyclopentane': '4850-28-6'
'r-1,c-2,t-3-trimethylcyclopentane': '19374-46-0'
'1,cis-2,cis-3-trimethylcyclopentane': '2613-69-6'
'r-1,c-3,c-5-trimethylcyclohexane': '1795-27-3'
'r-1,c-3,t-5-trimethylcyclohexane': '1795-26-2'
'trans,trans-1,2,4-trimethylcyclohexane': ''
'cis-1,2,trans-1,4-1,2,4-trimethylcyclohexane': ''
'r-1,t-2,c-3-trimethylcyclohexane': '1678-81-5'
'r-1,t-2,t-4-trimethylcyclohexane': ''
'cis-1,2,cis-1,4-1,2,4-trimethylcyclohexane': '2234-75-5'
'cis,cis-1,2,3-trimethylcyclohexane': '1678-97-3'
'cis-1,2,trans-1,3-cyclohexane': '7667-55-2'


In [157]:
len(set(comp_dict.keys()))

203

In [158]:
len(set(comp_dict.values()))

198

In [159]:
comp_df = pd.DataFrame(list(comp_dict.items()), columns=['Compound', 'CAS'])
final = pd.merge(merged_df, comp_df, on='Compound', how='inner')
cols = list(final)
cols.insert(2, cols.pop(cols.index('CAS')))
final = final.loc[:, cols]

In [160]:
cols = list(final)
cols.insert(2, cols.pop(cols.index('CAS')))
final = final.loc[:, cols]
final

Unnamed: 0,Order,Compound,CAS,Formula,Molar Mass [g/mol],Boiling T. [K],Triple Point T. [K],Vapor P. @15C [kPa],Vapor P. @40C [kPa],Crit T. [K],Crit. P. [kPa],Crit. Density [kg/m^3],h,Liq. Relative Density @15C:1atm,API Gravity @15C:1atm,Desntiy of Liquid @15C:1atm [kg/m^3],T. Coef. of Density @15C:sat [1/K],Ideal Gas Relative Density @15C:1atm,Ideal Gas Relative Volume @15C:1atm [m^3/kg],Ideal Gas Density @15C:1atm[kg/m^3],Summation Factor: z=1-P*b^2 @15C [1/kPa^0.5],"Summation Factor: z=1-P/P0*b^2, P0=101.325kPa @15C",Compressibility Factor @15C,Gross Heating Value Liquid [MJ/kg],Gross Heating Value Liquid [MJ/m^3],Gross Heating Value Ideal Gas [MJ/kg],Gross Heating Value Ideal Gas [MJ/m^3],Net Heating Value Liquid [MJ/kg],Net Heating Value Ideal Gas [MJ/m^3],Volume of Air Required to Burn One Vol. of Ideal Gas,Heat of Vaporization @1atm [kJ/kg],"Specific Heat Cp, ideal gas @15C [kJ/(kg*K)]","Specific Heat Cv, ideal gas @15C [kJ/(kg*K)]","Specific Heat Csat., liquid @15C [kJ/(kg*K)]",k=Cp/Cv,"Refractive Index, nD @15C","Flammability Limits Lower @310.93K,1atm [volume % in air]","Flammability Limits Upper @310.93K,1atm [volume % in air]",Octane Number - Motor Method D-357,Octane Number - Research Method D-908
0,1.0,methane,74-82-8,CH4,16.0425,111.67,90.69,,35000.0,190.56,4599.0,162.66,0.0114,0.3,340.0,300.0,,0.5539,1.4739,0.67848,0.00442,0.04453,,,,55.575,37.706,,33.95,9.552,510.8,2.2036,1.6853,,1.3075,1.00042,5.0,15.0,,
1,2.0,ethane,74-84-0,C2H6,30.069,184.57,90.37,3375.5,5500.0,305.32,4872.2,206.18,0.0995,0.35817,263.56,357.85,-0.01277,1.0382,0.7864,1.2717,0.0091,0.0916,0.5834,51.604,18467.0,51.951,66.066,47.172,60.43,16.715,489.4,1.7054,1.4289,4.005,1.1935,1.00072,2.9,13.0,0.05,1.6
2,3.0,propane,74-98-6,C3H8,44.0956,231.04,85.53,731.51,1369.4,369.89,4251.2,220.48,0.1521,0.50796,147.07,507.5,-0.00299,1.5225,0.5362,1.8649,0.0132,0.1333,0.8514,49.99,25370.0,50.369,93.934,45.961,86.42,23.879,425.6,1.6191,1.4305,2.584,1.1318,1.00103,2.0,9.5,97.1,1.8
3,4.0,isobutane,75-28-5,C4H10,58.1222,261.4,113.73,258.99,531.21,407.81,3629.0,225.5,0.1835,0.56346,119.63,562.95,-0.00217,2.0068,0.4068,2.4581,0.0168,0.1695,0.9198,49.038,27606.0,49.388,121.4,45.216,112.0,31.043,365.1,1.6162,1.4731,2.3586,1.0971,1.00123,1.8,8.5,97.6,0.1
4,5.0,n-butane,106-97-8,C4H10,58.1222,272.66,134.9,176.15,378.49,425.13,3796.0,228.0,0.2008,0.58478,110.47,584.25,-0.00194,2.0068,0.4068,2.4581,0.0179,0.1807,0.9394,49.166,28725.0,49.546,121.79,45.345,112.4,31.043,385.7,1.6511,1.508,2.3805,1.0949,1.00133,1.5,9.0,89.6,93.8
5,6.0,isopentane,78-78-4,C5H12,72.1488,300.98,112.65,63.57,151.51,460.35,3378.0,236.0,0.2274,0.62564,94.668,625.08,-0.0016,2.4911,0.3277,3.0514,0.022,0.2214,0.9682,48.593,30375.0,48.95,149.36,44.898,138.1,38.206,343.3,1.6003,1.4851,2.2208,1.0776,1.35669,1.3,8.0,90.3,92.3
6,7.0,n-pentane,109-66-0,C5H12,72.1488,309.21,143.47,46.45,115.67,469.7,3370.0,232.0,0.2515,0.63119,92.68,630.62,-0.00154,2.4911,0.3277,3.0514,0.0232,0.233,0.9744,48.668,30691.0,49.045,149.65,44.974,138.4,38.206,357.6,1.6216,1.5064,2.2698,1.0765,1.3601,1.4,8.3,62.6,61.7
7,8.0,n-hexane,110-54-3,C6H14,86.1754,341.86,177.83,12.86,37.292,507.5,3012.0,233.18,0.2986,0.66452,81.436,663.92,-0.00137,2.9754,0.2744,3.6446,0.0296,0.2984,0.9886,48.342,32095.0,48.715,177.55,44.735,164.39,45.37,334.9,1.6125,1.516,2.2104,1.0636,1.37698,1.1,7.7,26.0,24.8
8,9.0,n-heptane,142-82-5,C7H16,100.2019,371.53,182.55,3.622,12.326,540.13,2736.0,232.0,0.3494,0.68864,73.977,688.02,-0.00122,3.4597,0.236,4.2378,0.0364,0.366,0.9952,48.103,33096.0,48.474,205.42,44.555,190.39,52.534,316.9,1.6057,1.5227,2.2038,1.0545,1.39024,1.0,7.0,,
9,10.0,n-octane,111-65-9,C8H18,114.2285,398.77,216.37,1.03,4.1263,568.83,2487.0,234.9,0.3971,0.70693,68.661,706.3,-0.00114,3.944,0.207,4.831,0.0431,0.4343,0.9981,47.919,33845.0,48.29,233.29,44.418,216.38,59.697,302.2,1.6008,1.528,2.1918,1.0476,1.39987,0.8,6.5,,


In [161]:
len(set(final['Compound']))

203

In [162]:
len(set(final['CAS']))

198

In [163]:
export_name = 'GPA 2145-16 Compound Properties Table - Metric'
#final.to_csv('%s.csv' % export_name, index=False)
final.to_pickle("%s.pkl" % export_name)  
final.to_excel("%s.xlsx" % export_name, index=False)