# Parsing NASA Polynomials

### Read the NASA Polynomial dataset in raw format and parse and store the data into an .xml file.

In [7]:
"""String methods."""

def chunkstring(string, length):
    """Split string in to fixed length chunks."""
    return (string[0+i:length+i] for i in range(0, len(string), length))

def make_intervals(alist):
    """Create intervals using ordered list."""
    alist.sort()
    intervals = []
    for i in range(len(alist)-1):
        w = [alist[i], alist[i+1]]
        intervals.append(w)
    return intervals   

In [8]:
"""NASA Polynomial Parser."""

with open("thermo_all.txt", "r") as f:
    data = f.readlines()
    
# Input params
num_cf = 7    
skip_rows = 5

# Output structures
temp_vals = {}
coefficients = {}
cf = []

# Skip header
data = data[skip_rows:]

# Loop over each line in file
for line in data:
    
    text = line.rstrip()
    if len(text) > 0:  
        
        # Find line index
        try:
            index = int(text[-1])
        except:
            break
        
        if index == 1:
            
            # Get specie and temperature data
            s = text.split()
            specie = s[0]
            temp = [float(k) for k in s[-4:-1]]
            temp_vals[specie] = make_intervals(temp)     

        else:
            
            # Append coefficient data
            s = text[:-1].rstrip()
            for p in chunkstring(s, 15):
                cf.append(p) 
        
        if index == 4:
            d = {}
            for i in range(len(temp)-1):
                temp.sort(reverse=True)
                t = temp[i+1]
                d[t] = cf[:num_cf]
                cf = cf[num_cf:]
            coefficients[specie] = d   
            

species_list =  list(coefficients.keys())     
print("Number of species imported: {}".format(len(species_list)))

Number of species imported: 53


In [10]:
"""Create XML file."""

from lxml import etree
from lxml.etree import Element, SubElement, Comment

# Root
root = Element('ctml')

comment = Comment("phase grid30")
root.append(comment)

# Phase
phase = SubElement(root, 'phase', id="gri30")
speciesArray = SubElement(phase, 'speciesArray')
speciesArray.set('datasrc', '#species_data')
speciesArray.text = ' '.join(species_list)

# Species data
comment = Comment("species definitions")
root.append(comment)
speciesData = SubElement(root, 'speciesData', id="#species_data")

for k, v in coefficients.items():
    comment = Comment("species " + str(k))
    speciesData.append(comment)
    
    # Species
    species = SubElement(speciesData, 'species', name=k)
    thermo = SubElement(species, 'thermo')
    
    # Data
    temp = temp_vals[k]
    for t in temp:
        
        # Temperature intervals
        NS = SubElement(thermo, 'NASA')
        NS.set('Tmax', str(t[1]))
        NS.set('Tmin', str(t[0]))
        NS.set('P0', '100000.0')
        
        # Coefficients
        floats = '\n' + ' '*12 + ', '.join(v[t[0]]) + '\n' + ' '*10
        f = SubElement(NS, 'floatArray', name="Coeffs", size="7")
        f.text = floats
    

xml = etree.ElementTree(root)
xml.write("thermo_out.xml", pretty_print=True)

### Read the NASA Polynomial dataset in .xml format and create SQL database

In [14]:
# Create database and tables

import sqlite3

db = sqlite3.connect('thermo.sqlite')
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS LOW")
cursor.execute("DROP TABLE IF EXISTS HIGH")

cursor.execute('''CREATE TABLE LOW (
               SPECIES_NAME TEXT PRIMARY KEY NOT NULL, 
               TLOW INT NOT NULL, 
               THIGH INT NOT NULL, 
               COEFF_1 FLOAT, 
               COEFF_2 FLOAT,
               COEFF_3 FLOAT, 
               COEFF_4 FLOAT, 
               COEFF_5 FLOAT, 
               COEFF_6 FLOAT, 
               COEFF_7 FLOAT)''')

cursor.execute('''CREATE TABLE HIGH (
               SPECIES_NAME TEXT PRIMARY KEY NOT NULL, 
               TLOW INT NOT NULL, 
               THIGH INT NOT NULL, 
               COEFF_1 FLOAT, 
               COEFF_2 FLOAT,
               COEFF_3 FLOAT, 
               COEFF_4 FLOAT, 
               COEFF_5 FLOAT, 
               COEFF_6 FLOAT, 
               COEFF_7 FLOAT)''');

In [15]:
# Import XML and populate database

import xml.etree.ElementTree as ET

tree = ET.parse('thermo_out.xml')
thermo = tree.getroot()

for specie in thermo.find('speciesData').findall('species'):
    
    count = 0
    for t_range in specie.find('thermo').findall('NASA'):
        for coef in t_range.findall('floatArray'):
            
            # Specie name
            specie_name = specie.attrib['name']
            
            # Temperature range
            t_low = t_range.attrib['Tmin']
            t_high = t_range.attrib['Tmax']
            
            # Coefficients
            vals_insert = [specie_name, t_low, t_high]
            for s in coef.text.split(','):
                vals_insert.append(s.strip())
            
            if count == 0:
                tbl = "LOW"
            else:
                tbl = "HIGH"
            count += 1
                
            cursor.execute("INSERT INTO " + tbl + " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", vals_insert)

In [16]:
from IPython.display import display
import pandas as pd

def viz_tables(cols, query):
    q = cursor.execute(query).fetchall()
    framelist = []
    for i, col_name in enumerate(cols):
        framelist.append((col_name, [col[i] for col in q]))
    return display(pd.DataFrame.from_items(framelist))

thermo_cols = [col[1] for col in cursor.execute("PRAGMA table_info(LOW)")]

print("TABLE LOW:")
query = '''SELECT * FROM LOW'''
viz_tables(thermo_cols, query)

print("TABLE HIGH:")
query = '''SELECT * FROM HIGH'''
viz_tables(thermo_cols, query)

TABLE LOW:


Unnamed: 0,SPECIES_NAME,TLOW,THIGH,COEFF_1,COEFF_2,COEFF_3,COEFF_4,COEFF_5,COEFF_6,COEFF_7
0,O,200,1000,3.168267,-0.003279319,6.643064e-06,-6.128066e-09,2.11266e-12,29122.2592,2.051933
1,O2,200,1000,3.782456,-0.002996734,9.847302e-06,-9.681295e-09,3.243728e-12,-1063.94356,3.657676
2,H,200,1000,2.5,7.053328e-13,-1.99592e-15,2.300816e-18,-9.277323e-22,25473.6599,-0.446683
3,H2,200,1000,2.344331,0.007980521,-1.947815e-05,2.015721e-08,-7.376118e-12,-917.935173,0.68301
4,OH,200,1000,3.992015,-0.002401318,4.617938e-06,-3.881133e-09,1.364115e-12,3615.08056,-0.103925
5,H2O,200,1000,4.198641,-0.002036434,6.520402e-06,-5.487971e-09,1.771978e-12,-30293.7267,-0.849032
6,HO2,200,1000,4.301798,-0.004749121,2.115829e-05,-2.427639e-08,9.292251e-12,294.80804,3.716662
7,H2O2,200,1000,4.276113,-0.0005428224,1.673357e-05,-2.157708e-08,8.624544e-12,-17702.5821,3.435051
8,C,200,1000,2.55424,-0.0003215377,7.337922e-07,-7.322349e-10,2.665214e-13,85443.8832,4.531308
9,CH,200,1000,3.489817,0.0003238355,-1.688991e-06,3.162173e-09,-1.406091e-12,70797.2934,2.084011


TABLE HIGH:


Unnamed: 0,SPECIES_NAME,TLOW,THIGH,COEFF_1,COEFF_2,COEFF_3,COEFF_4,COEFF_5,COEFF_6,COEFF_7
0,O,1000,3500,2.569421,-8.597411e-05,4.194846e-08,-1.001778e-11,1.228337e-15,29217.5791,4.784339
1,O2,1000,3500,3.282538,0.001483088,-7.579667e-07,2.094706e-10,-2.167178e-14,-1088.45772,5.453231
2,H,1000,3500,2.5,-2.30843e-11,1.615619e-14,-4.735152e-18,4.981974000000001e-22,25473.6599,-0.446683
3,H2,1000,3500,3.337279,-4.940247e-05,4.994568e-07,-1.795664e-10,2.002554e-14,-950.158922,-3.205023
4,OH,1000,3500,3.092888,0.0005484297,1.265052e-07,-8.794616e-11,1.174124e-14,3858.657,4.476696
5,H2O,1000,3500,3.033992,0.002176918,-1.640725e-07,-9.704199e-11,1.68201e-14,-30004.2971,4.96677
6,HO2,1000,3500,4.017211,0.00223982,-6.336581e-07,1.142464e-10,-1.079085e-14,111.856713,3.785102
7,H2O2,1000,3500,4.165003,0.004908317,-1.901392e-06,3.71186e-10,-2.879083e-14,-17861.7877,2.916157
8,C,1000,3500,2.492669,4.798893e-05,-7.24335e-08,3.74291e-11,-4.872779e-15,85451.2953,4.801504
9,CH,1000,3500,2.878465,0.0009709137,1.444457e-07,-1.306878e-10,1.760794e-14,71012.4364,5.48498


In [17]:
db.commit()
db.close()