In [1]:
# script to collect all of the thermo calculations and put them together into a database

In [8]:
import os
import re
import glob
import rmgpy.data.thermo

import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict

In [9]:
DFT_DIR = "/work/westgroup/harris.se/autoscience/autoscience/butane/dft/"
thermo_libs = glob.glob(os.path.join(DFT_DIR, 'thermo', 'species*', 'arkane', 'RMG_libraries'))
print(f'{len(thermo_libs)} thermo libs')

110 thermo libs


In [10]:
# Load the Arkane thermo
entries = []
for i, lib_path in enumerate(thermo_libs):
    matches = re.search('species_([0-9]{4})', lib_path)
    species_index = int(matches[1])
    ark_thermo_database = rmgpy.data.thermo.ThermoDatabase()
    ark_thermo_database.load_libraries(
        lib_path,
    )

    for key in ark_thermo_database.libraries['thermo'].entries.keys():
        entry = ark_thermo_database.libraries['thermo'].entries[key]
        entry.index = species_index
        entry.label = entry.item.smiles
        entries.append(entry)
print(f'{len(entries)} entries')

110 entries


In [11]:
# check the number of entries and unique labels
labels = [entry.item.smiles for entry in entries]
print(len(labels))
labels = set(labels)
print(len(labels))

110
110


In [12]:
# compile it all into a single database and a single library which I'll call harris_butane
ark_thermo_database = rmgpy.data.thermo.ThermoDatabase()
ark_thermo_database.libraries['thermo'] = rmgpy.data.thermo.ThermoLibrary()
ark_thermo_database.libraries['thermo'].label = 'harris_butane'
ark_thermo_database.libraries['thermo'].entries = OrderedDict()
for entry in entries:
    ark_thermo_database.libraries['thermo'].entries[entry.label] = entry

In [13]:
# save the results
output_path = os.path.join(DFT_DIR, 'thermo', 'thermo')
ark_thermo_database.save_libraries(output_path)

In [14]:
len(ark_thermo_database.libraries['thermo'].entries)

110