In [1]:
import numpy as np
import pandas as pd

In [2]:
#!/usr/bin/python
# Procedure to read in and check refractive index data file (filename)
# File should be correctly formatted with filename extension .ri
# Created 2017-07-27 by OK

import os

class ReadError(Exception):
    def __init__(self, value):
        self.parameter = value
    def __str__(self):
        return repr(self.parameter)

def read_ri(filename):
    expected_header_names = ['FORMAT', 'DESCRIPTION', 'DISTRIBUTEDBY', 'SUBSTANCE', 'SAMPLEFORM', 'TEMPERATURE', 'CONCENTRATION', 'REFERENCE', 'DOI', 'SOURCE', 'CONTACT', 'COMMENT']
    expected_column_names = ['wavl', 'wavn', 'n', 'dn', 'k', 'dk']
    with open(filename, 'r') as f:
        t = f.readlines()
        t = [x.strip() for x in t] # strips whitespace from beginning and end of lines
        
        out = {'header':{},'data':{}}
        
        header_lines = 0
        data_lines = 0
        for line in t:
            if line[0] == '#':
                header_lines += 1
                if data_lines > 0:
                    raise ReadError('Incorrectly formatted file (%s): Header not contiguous.' %filename)
            else:
                data_lines += 1
        for i in range(1,data_lines): # ignore blank lines at end of file
            if any(char.isdigit() for char in t[-i]):
                break
            else:
                data_lines -= 1
        
        if header_lines == 0:
            raise ReadError('Incorrectly formatted file (%s): No header.' %filename)
        if data_lines == 0:
            raise ReadError('Incorrectly formatted file (%s): No data.' %filename)
    
        for i in range(header_lines):
            line = t[i][1:] # strip leading '#'
            if line[0] != '#':
                tag_name = line.split('=',1)[0].strip().upper()
                if tag_name not in expected_header_names:
                    print('Unknown header tag "%s", so ignored (file: %s)' %(tag_name, filename) )
                    continue
                try: # ensure tag content is encoded consistently
#                     tag_content = line.split('=',1)[1].strip().decode('utf8').encode('utf8', 'xmlcharrefreplace')
                    tag_content = line.split('=',1)[1].strip()

                except UnicodeDecodeError as e:
#                     tag_content = line.split('=',1)[1].strip().decode("iso-8859-1").encode('utf8', 'xmlcharrefreplace')
                    tag_content = line.split('=',1)[1].strip()

                if tag_name in out['header']:
                    tag_content = out['header'][tag_name]+' '+tag_content
                out['header'][tag_name] = tag_content
            elif tag_name in expected_header_names:
                tag_content = line[1:].strip()
                out['header'][tag_name] = out['header'][tag_name]+' '+tag_content
        
        if 'FORMAT' not in out['header']:
            raise ReadError('Incorrectly formatted file (%s): No FORMAT tag in header.' %filename)
        
        column_labels = out['header']['FORMAT'].split()
        column_labels = [x.strip().lower() for x in column_labels]
        for cl in column_labels:
            if cl not in expected_column_names:
                print( 'Unknown column name "%s", so ignored (file: %s)' %(cl, filename) )
                continue
            out['data'][cl] = []
        for l in range(header_lines, data_lines):
            line = t[l].split()
            line = [x.strip() for x in line]
            for c in range(len(column_labels)):
                if column_labels[c] in expected_column_names:
                    out['data'][column_labels[c]].append(float(line[c]))
        
        # add wavl & wavn columns if needed (wavl in micro-m, wavn in cm-1)
        if 'wavn' not in out['data']:
            out['data']['wavn'] = [float(10000)/x if x!=0 else float('nan') for x in out['data']['wavl']]
        if 'wavl' not in out['data']:
            out['data']['wavl'] = [float(10000)/x if x!=0 else float('nan') for x in out['data']['wavn']]
        
        col_lengths = []
        for col in out['data']:
            col_lengths.append(len(out['data'][col]))
        if len(set(col_lengths)) > 1:
            raise ReadError('Incorrectly formatted file (%s): Data columns have different lengths.' %filename)
    return out



In [3]:
os.system('ls *.ri >& ri.names')


0

In [4]:
with open('ri.names', 'r') as f:
    names = f.readlines()
    i=0
    for name in names:

        RI = read_ri("./"+name.rstrip('\n'))
    
        wt = np.zeros(len(RI['data']["wavn"]))
        wt[:] = float(RI['header']["CONCENTRATION"][0:2])

        temp = np.zeros(len(RI['data']["wavn"]))
        temp[:] = float(RI['header']["TEMPERATURE"][0:3])

        data = RI['data']
        data['wt_%'] = wt
        data['temp_K'] = temp
        data["wavn_cm-1"] = data.pop("wavn")
        data["wavl_micro-m"] = data.pop("wavl")
        data["wavl_nm"] = data["wavl_micro-m"]


        df_new = pd.DataFrame.from_dict(data)
        df_new.drop(columns=['wavl_micro-m'])
        df_new["wavl_nm"] = df_new["wavl_nm"]*1000
        
        if i==0:
            df_total = df_new
        else:
            df_total = pd.concat([df_total, df_new])
        i=i+1
        
# make the row index from 0 to the length of df_total
index = np.arange(len(df_total))
df_total.set_axis(index, axis=0)

df_total.to_csv('Fractive_index_LUT.csv')

In [5]:
df_total

Unnamed: 0,n,k,wt_%,temp_K,wavn_cm-1,wavl_micro-m,wavl_nm
0,1.33259,0.00000,10.0,230.0,6000.00000,1.666667,1666.666667
1,1.33256,0.00000,10.0,230.0,5996.00000,1.667779,1667.778519
2,1.33253,0.00000,10.0,230.0,5992.00000,1.668892,1668.891856
3,1.33250,0.00000,10.0,230.0,5988.00000,1.670007,1670.006680
4,1.33246,0.00000,10.0,230.0,5984.00000,1.671123,1671.122995
...,...,...,...,...,...,...,...
4719,1.79875,0.38136,81.0,298.0,419.49320,23.838289,23838.288678
4720,1.79969,0.37794,81.0,298.0,417.99308,23.923841,23923.841036
4721,1.80034,0.37607,81.0,298.0,416.49297,24.010009,24010.009101
4722,1.80299,0.37510,81.0,298.0,414.99286,24.096800,24096.800123


## select temp that is closes to 240 K, so that only one temp in one spesific wt_%

In [6]:
wt_names = df_total["wt_%"].unique()

wt_names

array([10., 15., 20., 25., 30., 33., 36., 37., 38., 48., 58., 65., 72.,
       76., 81.])

# Stop here!

In [84]:
with open('ri.names', 'r') as f:
    names = f.readlines()
    for line in names:
        print(line)
    
    
    

RI = read_ri("./H2SO4_38_213K_Myhre_2003.ri")

wt = np.zeros(len(RI['data']["wavn"]))
wt[:] = float(RI['header']["CONCENTRATION"][0:2])

temp = np.zeros(len(RI['data']["wavn"]))
temp[:] = float(RI['header']["TEMPERATURE"][0:3])

data = RI['data']
data['wt_%'] = wt
data['temp_K'] = temp
data["wavn_cm-1"] = data.pop("wavn")
data["wavl_micro-m"] = data.pop("wavl")
data["wavl_nm"] = data["wavl_micro-m"]


df_new = pd.DataFrame.from_dict(data)
df_new.drop(columns=['wavl_micro-m'])
df_new["wavl_nm"] = df_new["wavl_nm"]*1000

df_new = df_new.loc[:, ['wt_%','temp_K',"wavn_cm-1","wavl_nm","n","k"]]

In [85]:
df_new

Unnamed: 0,wt_%,temp_K,wavn_cm-1,wavl_nm,n,k
0,38.0,213.0,7498.52083,1333.596349,1.39033,0.01410
1,38.0,213.0,7497.02072,1333.863194,1.39039,0.01411
2,38.0,213.0,7495.52061,1334.130145,1.39047,0.01412
3,38.0,213.0,7494.02050,1334.397204,1.39053,0.01411
4,38.0,213.0,7492.52039,1334.664369,1.39058,0.01409
...,...,...,...,...,...,...
4719,38.0,213.0,419.49320,23838.288678,1.76110,0.34746
4720,38.0,213.0,417.99308,23923.841036,1.76061,0.34757
4721,38.0,213.0,416.49297,24010.009101,1.76032,0.34774
4722,38.0,213.0,414.99286,24096.800123,1.76014,0.34787


In [87]:
df_new['temp_K'].max(),df_new['temp_K'].min()

(213.0, 213.0)

In [88]:
df_new['wavl_nm'].max(),df_new['wavl_nm'].min()

(24184.22088416302, 1333.5963487614931)

In [27]:
RI.keys(), RI['header'].keys(), RI['data'].keys()


(dict_keys(['header', 'data']),
 dict_keys(['SUBSTANCE', 'DESCRIPTION', 'TEMPERATURE', 'CONCENTRATION', 'DOI', 'REFERENCE', 'FORMAT']),
 dict_keys(['wavn', 'n', 'k', 'wavl']))

In [65]:
RI['header']

{'SUBSTANCE': 'Sulphuric acid',
 'DESCRIPTION': 'Refractive indices of 38 wt% sulphuric acid from 400 to 7500 cm-1 at 213 K',
 'TEMPERATURE': '213 K',
 'CONCENTRATION': '38 wt%',
 'DOI': '10.1021/jp026576n',
 'REFERENCE': 'Spectroscopic Study of Aqueous H2SO4 at Different Temperatures and Compositions: Variations in Dissociation and Optical Properties Cathrine E. Lund Myhre, Daniel H. Christensen, Flemming M. Nicolaisen and Claus J. Nielsen, The Journal of Physical Chemistry A 2003 107 (12), 1979-1991',
 'FORMAT': 'WAVN N K'}

In [55]:
wt = np.zeros(len(RI['data']["wavn"]))
wt[:] = float(RI['header']["CONCENTRATION"][0:2])

temp = np.zeros(len(RI['data']["wavn"]))
temp[:] = float(RI['header']["TEMPERATURE"][0:3])

wt, temp

(array([38., 38., 38., ..., 38., 38., 38.]),
 array([213., 213., 213., ..., 213., 213., 213.]))

In [58]:
data = RI['data']

len(RI['data']["wavn"]), len(RI['data']["n"]), len(RI['data']["k"]), len(RI['data']["wavl"])


(4724, 4724, 4724, 4724)

In [69]:
data['wt_%'] = wt
data['temp_K'] = temp
data["wavn_cm-1"] = data.pop("wavn")
data["wavl_micro-m"] = data.pop("wavl")

In [61]:
data.keys()

dict_keys(['wavn', 'n', 'k', 'wavl', 'wt', 'temp'])

In [63]:
new = pd.DataFrame.from_dict(data)

In [64]:
new

Unnamed: 0,wavn,n,k,wavl,wt,temp
0,7498.52083,1.39033,0.01410,1.333596,38.0,213.0
1,7497.02072,1.39039,0.01411,1.333863,38.0,213.0
2,7495.52061,1.39047,0.01412,1.334130,38.0,213.0
3,7494.02050,1.39053,0.01411,1.334397,38.0,213.0
4,7492.52039,1.39058,0.01409,1.334664,38.0,213.0
...,...,...,...,...,...,...
4719,419.49320,1.76110,0.34746,23.838289,38.0,213.0
4720,417.99308,1.76061,0.34757,23.923841,38.0,213.0
4721,416.49297,1.76032,0.34774,24.010009,38.0,213.0
4722,414.99286,1.76014,0.34787,24.096800,38.0,213.0
