In [1]:
# script to compile calculations from dft/kinetics and dft/thermo

In [2]:
import os
import re
import glob
import yaml

import rmgpy.chemkin
import rmgpy.data.kinetics
import rmgpy.data.thermo

import numpy as np
import pandas as pd
# import importlib
# importlib.reload(rmgpy.data.kinetics)

import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict

import sys
sys.path.append(os.path.join(os.environ['AUTOSCIENCE_REPO'], 'database'))
import database_fun

In [3]:
DFT_DIR = os.path.join(os.environ['AUTOSCIENCE_REPO'], 'dft')

# Add thermo

In [5]:
# keep only the ones that appear in the top 10 of a mech_summary
mech_files = glob.glob('../RMG_example_fuel_*/mech_summary*.csv')
total_include_list = []
total_sp_include_list = []
for mech_file in mech_files:
    include_list = []
    sp_include_list = []
    mech_summary = pd.read_csv(mech_file, index_col=0)
    # get the first 10 reactions to attempt for every iteration of this
    for i in range(len(mech_summary)):
        if mech_summary['possible'].values[i] and mech_summary['family'].values[i] != 'species':
            include_list.append(mech_summary['db_index'].values[i])
        elif mech_summary['family'].values[i] == 'species':
            sp_include_list.append(mech_summary['db_index'].values[i]) 
        if len(include_list) + len(sp_include_list) > 9:
            break
            
    total_include_list += include_list
    total_sp_include_list += sp_include_list
    

In [8]:
thermo_libs = glob.glob(os.path.join(DFT_DIR, 'thermo', 'species*', 'arkane', 'RMG_libraries'))
print(f'{len(thermo_libs)} thermo libs')


# Load the Arkane thermo
entries = []
for i, lib_path in enumerate(thermo_libs):
    matches = re.search('species_([0-9]{4})', lib_path)
    species_index = int(matches[1])
    ark_thermo_database = rmgpy.data.thermo.ThermoDatabase()
    ark_thermo_database.load_libraries(
        lib_path,
    )

    for key in ark_thermo_database.libraries['thermo'].entries.keys():
        entry = ark_thermo_database.libraries['thermo'].entries[key]
        entry.index = species_index
        entry.label = entry.item.smiles
        entries.append(entry)
print(f'{len(entries)} entries')

130 thermo libs
130 entries


In [9]:
# compile it all into a single database and a single library which I'll call harris_butane
ark_thermo_database = rmgpy.data.thermo.ThermoDatabase()
ark_thermo_database.libraries['thermo'] = rmgpy.data.thermo.ThermoLibrary()
ark_thermo_database.libraries['thermo'].label = 'harris_butane'
ark_thermo_database.libraries['thermo'].entries = OrderedDict()
for entry in entries:
    # check isomorphism with include_list
    idx = database_fun.get_unique_species_index(rmgpy.species.Species().from_adjacency_list(entry.item.to_adjacency_list()))
    if idx not in total_sp_include_list:
        continue
    ark_thermo_database.libraries['thermo'].entries[entry.label] = entry

In [10]:
# save the results
ark_thermo_database.save_libraries('thermo')

In [11]:
# try reading to test
# Load the new thermo library

thermo_lib = os.path.join('thermo')
ark_thermo_database = rmgpy.data.thermo.ThermoDatabase()
ark_thermo_database.load_libraries(thermo_lib)
# print(ark_kinetics_database.libraries)
print(f'{len(ark_thermo_database.libraries["harris_butane"].entries)} entries loaded')


0 entries loaded


# Add kinetics

In [12]:
mech_files = glob.glob('../RMG_example_fuel_*/mech_summary*.csv')  # names must be formatted to match this search string
total_include_list = []
for mech_file in mech_files:
    include_list = []
    sp_include_list = []
    mech_summary = pd.read_csv(mech_file, index_col=0)
    # get the first 10 reactions to attempt for every iteration of this
    for i in range(len(mech_summary)):
        if mech_summary['possible'].values[i] and mech_summary['family'].values[i] != 'species':
            # if we already included this on a previous list, then its existence here means it was a failure
            # and we shouldn't include it in this round's list of top 10 to calculate
            if mech_summary['db_index'].values[i] in total_include_list:
                continue
            include_list.append(mech_summary['db_index'].values[i])
        elif mech_summary['family'].values[i] == 'species':
            sp_include_list.append(mech_summary['db_index'].values[i]) 
        if len(include_list) + len(sp_include_list) > 9:
            print('list: ', include_list, sp_include_list)
            break
    total_include_list += include_list
    

list:  [4733, 4721, 952, 1758, 1692, 869, 870, 1785, 945, 296] []


In [13]:
mech_files

['../RMG_example_fuel_YYYYMMDD/mech_summary.csv']

In [14]:
total_include_list

[4733, 4721, 952, 1758, 1692, 869, 870, 1785, 945, 296]

In [15]:
len(total_include_list)

10

In [16]:
# first, get valid kinetics from old workflow
kinetics_libs = glob.glob(os.path.join(DFT_DIR, 'kinetics', 'reaction*', 'arkane', 'RMG_libraries'))

# Load the Arkane kinetics
entries = []
for i, lib_path in enumerate(kinetics_libs):
    
    matches = re.search('reaction_([0-9]{4,6})', lib_path)
    reaction_index = int(matches[1])
    
    ark_kinetics_database = rmgpy.data.kinetics.KineticsDatabase()
    ark_kinetics_database.load_libraries(lib_path)
    
    
    
    
    # TODO fix bug related to load_libraries not getting the actual name
    for key in ark_kinetics_database.libraries[''].entries.keys():
        entry = ark_kinetics_database.libraries[''].entries[key]
        
        
        # check isomorphism with include_list
        idx = database_fun.get_unique_reaction_index(ark_kinetics_database.libraries[''].entries[key].item)
        if idx not in total_include_list:
            break

        entry.index = reaction_index
        entries.append(entry)
        print(f'Adding\t{entry.index}\t{entry}')

Adding	296	CH2CHO(21) + PC4H9(182) <=> CH2CO(24) + butane(1)
Adding	4721	HO2(16) + SC4H9(183) <=> O2(2) + butane(1)
Adding	4733	HO2(16) + PC4H9(182) <=> O2(2) + butane(1)


In [17]:
# compile it all into a single database and a single library which I'll call harris_butane
ark_kinetics_database = rmgpy.data.kinetics.KineticsDatabase()
ark_kinetics_database.libraries['kinetics'] = rmgpy.data.kinetics.KineticsLibrary()
ark_kinetics_database.libraries['kinetics'].label = 'harris_butane'
ark_kinetics_database.libraries['kinetics'].name = 'harris_butane'
ark_kinetics_database.libraries['kinetics'].entries = OrderedDict()
for entry in entries:
    ark_kinetics_database.libraries['kinetics'].entries[entry.label] = entry

In [18]:
# save the results
# output_path = os.path.join(DFT_DIR, 'kinetics')
# ark_kinetics_database.save_libraries(output_path, reindex=False)
ark_kinetics_database.save_libraries('harris_kinetics', reindex=False)

In [19]:
# read the results again
kinetics_lib = os.path.join('harris_kinetics')
ark_kinetics_database = rmgpy.data.kinetics.KineticsDatabase()
ark_kinetics_database.load_libraries(kinetics_lib)
# print(ark_kinetics_database.libraries)
print(f'{len(ark_kinetics_database.libraries["kinetics"].entries)} entries loaded')

3 entries loaded
