In [1]:
# This script takes an RMG mechanism and uses the special correlated uncertainty database (rules_node_sensitivity branch)
# to produce the covariance matrix, along with a fake cantera mechanism that also includes training reactions for easy lookup

In [2]:
import numpy as np
import rmgpy.tools.uncertainty
import pickle
import json
import matplotlib.pyplot as plt

In [3]:
def unpack_sensitivity(long_desc):
    start_str = 'sensitivities = '
    if start_str not in long_desc:
        return []
    start_index = long_desc.find(start_str) + len(start_str)
    sensitivities_str = long_desc[start_index:].replace("'", '"')
    sensitivities_str = sensitivities_str.replace("nan", '"-9999999"')
    sensitivities_str = sensitivities_str.replace('name', 'training_rxn_name')
    return json.loads(sensitivities_str)

# Load the RMG Model

In [4]:
# load the model
chemkin = '../butane/chem_annotated.inp'
species_dict = '../butane/species_dictionary.txt'

uncertainty = rmgpy.tools.uncertainty.Uncertainty(output_directory='rmg_uncertainty')
uncertainty.load_model(chemkin, species_dict)




# TODO - force the user to provide the input file used to generate the mechanism to ensure databases are really the same
# load the database
# --------------- CAUTION!!! Databases here must match the ones used to generate the mechanism
# note - this cell stalls out on Discovery
thermo_libs = [
    'BurkeH2O2',
    'primaryThermoLibrary',
    'FFCM1(-)',
    'CurranPentane',
    'Klippenstein_Glarborg2016',
    'thermo_DFT_CCSDTF12_BAC',
    'DFT_QCI_thermo',
    'CBS_QB3_1dHR',
]

kinetic_libs = [
    'FFCM1(-)',
    'CurranPentane',
    'combustion_core/version5',
    'Klippenstein_Glarborg2016',
    'BurkeH2O2inArHe',
    'BurkeH2O2inN2',
]
uncertainty.load_database(
    thermo_libraries=thermo_libs,
    kinetics_families='default',
    reaction_libraries=kinetic_libs,
    kinetics_depositories=['training'],
)




In [5]:
# Get the different kinetic and thermo sources
uncertainty.extract_sources_from_model()
uncertainty.assign_parameter_uncertainties()

In [6]:
# Create a giant dictionary with all of the reaction family information in it
auto_gen_families = {}
for family_name in uncertainty.database.kinetics.families.keys():
    if family_name == 'Intra_R_Add_Endocyclic' or family_name == 'Intra_R_Add_Exocyclic':
        continue
    if uncertainty.database.kinetics.families[family_name].auto_generated and family_name not in auto_gen_families.keys():
        auto_gen_families[family_name] = uncertainty.database.kinetics.families[family_name].rules.get_entries()
        auto_gen_families[f'{family_name}_labels'] = [entry.label for entry in uncertainty.database.kinetics.families[family_name].rules.get_entries()]
        auto_gen_families[f'{family_name}_rxn_map'] = uncertainty.database.kinetics.families[family_name].get_reaction_matches(
            thermo_database=uncertainty.database.thermo,
            remove_degeneracy=True,
            get_reverse=True,
            exact_matches_only=False,
            fix_labels=True)

In [7]:
# put the autogenerated tree info into the kinetic sources dictionary
for rxn in uncertainty.reaction_list:
    if 'Rate Rules' in uncertainty.reaction_sources_dict[rxn] and uncertainty.reaction_sources_dict[rxn]['Rate Rules'][1]['node']:
        node_name = uncertainty.reaction_sources_dict[rxn]['Rate Rules'][1]['node']

        training_reactions = auto_gen_families[f'{rxn.family}_rxn_map'][node_name]
        
        # TODO use sensitivity instead of equal weight
        w = 1.0 / len(training_reactions)
        
        uncertainty.reaction_sources_dict[rxn]['Rate Rules'][1]['training'] = [(x, x, w) for x in training_reactions]

In [8]:
# Define Uncertainty Constants
k_nonexact = 3.5
k_rule = 0.5
k_family = 1.0
k_library = 0.5
k_train = 0.5

In [9]:
def get_intrinsic_correlation(source1, source2):
    # expects the training dictionary
    correlation = 0
    
    if 'Rate Rules' in source1:
        training1 = source1['Rate Rules'][1]['training']
    elif 'Training' in source1:
        training1 = [(source1['Training'][1].item, source1['Training'][1].item, 1.0)]
    if 'Rate Rules' in source2:
        training2 = source2['Rate Rules'][1]['training']
    elif 'Training' in source2:
        training2 = [(source2['Training'][1].item, source2['Training'][1].item, 1.0)]
    
    for i in range(0, len(training1)):
        for j in range(0, len(training2)):
            if training1[i][0] == training2[j][0]:
                weight_i = training1[i][2]
                weight_j = training2[j][2]
                correlation += weight_i * weight_j * k_rule
                break
    return correlation

In [None]:
# i = 370
# j = 1289

i = 296
j = 257
get_intrinsic_correlation(
    uncertainty.reaction_sources_dict[uncertainty.reaction_list[i]],
    uncertainty.reaction_sources_dict[uncertainty.reaction_list[j]]
)

In [10]:
# Construct the covariance matrix
Sigma_k = np.zeros((len(uncertainty.reaction_list), len(uncertainty.reaction_list)))


for i in range(0, len(uncertainty.reaction_list)):
    for j in range(0, i + 1):
        source_entry_i = uncertainty.reaction_sources_dict[uncertainty.reaction_list[i]]
        source_entry_j = uncertainty.reaction_sources_dict[uncertainty.reaction_list[j]]
        if i == j:
            if 'Library' in source_entry_i:
                Sigma_k[i,i] = k_library
            elif 'PDep' in source_entry_i:
                Sigma_k[i,i] = k_library
            elif 'Training' in source_entry_i:
                Sigma_k[i,i] = k_train
            elif 'Rate Rules' in source_entry_i:
                N = len(source_entry_i['Rate Rules'][1]['training'])
                Sigma_k[i,i] = get_intrinsic_correlation(source_entry_i, source_entry_j) + k_family + np.float_power(np.log10(N + 1), 2.0) * k_nonexact
            else:
                raise NotImplementedError
        
        else:  # off-diagonals
            # If they're library reactions, off diagonal is zero
            if 'Library' in source_entry_i:
                continue
            if 'Library' in source_entry_j:
                continue
            
            # If they're PDEP, just assume off-diagonal is zero
            if 'PDep' in source_entry_i:
                continue
            if 'PDep' in source_entry_j:
                continue
            
            # If they're not from the same family, off-diagonal is zero
            if uncertainty.reaction_list[i].family != uncertainty.reaction_list[j].family:
                continue
            
            intrinsic_corr = get_intrinsic_correlation(source_entry_i, source_entry_j)
            correlation = intrinsic_corr + k_family
            Sigma_k[i,j] = correlation
            Sigma_k[j,i] = correlation

In [11]:
import pickle

In [12]:
with open('correlation_matrix.pickle', 'wb') as handle:
    pickle.dump(Sigma_k, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [13]:
plt.imshow(Sigma_k)

<matplotlib.image.AxesImage at 0x7fcf67d5b650>

In [None]:
np.max(np.max(Sigma_k))

In [None]:
np.sum(np.sum(Sigma_k))

In [None]:
np.sum(np.sum(Sigma_k))

In [None]:
uncertainty.reaction_sources_dict[uncertainty.reaction_list[0]]

In [None]:
for i in range(0, len(uncertainty.reaction_list)):
    if 'Training' in uncertainty.reaction_sources_dict[uncertainty.reaction_list[i]]:
        print(i,uncertainty.reaction_sources_dict[uncertainty.reaction_list[i]])

In [None]:
uncertainty.reaction_sources_dict[uncertainty.reaction_list[211]]['Training'][1].item

In [None]:
for i in range(0, len(uncertainty.reaction_list)):
    if 'Training' in uncertainty.reaction_sources_dict[uncertainty.reaction_list[i]]:
        print(i,uncertainty.reaction_sources_dict[uncertainty.reaction_list[i]])

In [1]:
# plot the correlation matrix
x = np.arange(0, len(uncertainty.reaction_list))
y = np.arange(0, len(uncertainty.reaction_list))
X, Y = np.meshgrid(x,y)

NameError: name 'np' is not defined

In [None]:
plt.pcolor(X,Y, Sigma_k)

In [None]:
for rxn in uncertainty.reaction_list:
    if 'Rate Rules' in uncertainty.reaction_sources_dict[rxn] and uncertainty.reaction_sources_dict[rxn]['Rate Rules'][1]['node']:
        node_name = uncertainty.reaction_sources_dict[rxn]['Rate Rules'][1]['node']
#         print(node_name)
#         print(uncertainty.reaction_sources_dict[rxn])

        # store the result in uncertainty.reaction_sources_dict[rxn]['Rate Rules'][1]['training']
        # number of training reactions
        training_reactions = auto_gen_families[f'{rxn.family}_rxn_map'][node_name]
        
        # TODO use sensitivity instead of equal weight
        w = 1.0 / len(training_reactions)
        
        uncertainty.reaction_sources_dict[rxn]['Rate Rules'][1]['training'] = [(x, x, w) for x in training_reactions]

In [None]:
auto_gen_families[f'Disproportionation_rxn_map']['Root']

In [None]:
sens = unpack_sensitivity(auto_gen_families['Disproportionation'][0].long_desc)

In [None]:
weird_count = 0
for i in range(0, len(auto_gen_families['Disproportionation'])):
    sens = unpack_sensitivity(auto_gen_families['Disproportionation'][i].long_desc)
    dAs = [x['dA'] for x in sens]
    if np.round(np.sum(dAs)) != 1:
        weird_count += 1
#     dEAs = [x['dA_dEa'] for x in sens]
        print(np.sum(dAs), np.sum(dEAs))
    
print()
print(weird_count)

In [None]:
dAs = [x['dA'] for x in sens]

In [None]:
np.sum(dAs)

In [None]:
sens[1]

In [None]:
len(auto_gen_families['Disproportionation_rxn_map']['Root_Ext-2R!H-R_2R!H->C_N-4R->C'])

In [None]:
with open('uncorrelated_var.pickle', 'wb') as handle:
    pickle.dump(uncertainty.kinetic_input_uncertainties, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
uncertainty.extract_sources_from_model()
uncertainty.assign_parameter_uncertainties(correlated=True)

In [None]:
with open('correlated_var.pickle', 'wb') as handle:
    pickle.dump(uncertainty.kinetic_input_uncertainties, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:

Sigma, labels = uncertainty.get_uncertainty_covariance_matrix()

In [None]:
Sigma

In [None]:
uncorrelated

In [None]:
r = 1289
print(uncertainty.reaction_list[r])
# print(uncertainty.kinetic_input_uncertainties[r])
try:
    d = uncertainty.reaction_sources_dict[uncertainty.reaction_list[r]]['Rate Rules'][1]['degeneracy']
    training = uncertainty.reaction_sources_dict[uncertainty.reaction_list[r]]['Rate Rules'][1]['training']
    for item in training:
        print(item)
#     print(training)
except KeyError:
    pass
# print(uncertainty.reaction_sources_dict[uncertainty.reaction_list[r]])

In [None]:
training[0][0].data

In [None]:
training[0][1].data

In [None]:
dir(training[0][0])

In [None]:
type(training[0][0])

In [None]:
# look at reactions 370 and 1289 and
uncertainty.kinetic_input_uncertainties[r]

In [None]:
uncertainty.reaction_sources_dict[uncertainty.reaction_list[370]]

In [None]:
for sp in uncertainty.species_list:
#     print(sp.molecule[0].smiles)
    if sp.molecule[0].smiles == "CCO":
        print(uncertainty.species_list.index(sp))

In [None]:
[uncertainty.species_list[110]]

In [None]:
dir(uncertainty)

In [None]:
uncertainty.species_sources_dict

In [None]:
uncertainty.kinetic_input_uncertainties

In [None]:
dir(uncertainty)

In [None]:
for rxn in uncertainty.kinetic_input_uncertainties:
    keys = rxn.keys()
    for key in keys:
        if 'Disproportionation' in key:
            print(rxn)

In [None]:
len(uncertainty.reaction_list)

In [None]:
my_dict = {}

for i, rxn in enumerate(uncertainty.reaction_list):
    try:
        if rxn.family == 'H_Abstraction':
            print(i, rxn)
            my_dict[i] = uncertainty.kinetic_input_uncertainties[i].keys()
    except AttributeError:
        pass

In [None]:
uncertainty.kinetic_input_uncertainties[1289]

In [None]:
for key1 in my_dict.keys():
    if 'H_Abstraction C/H3/Ct;C_rad/H/Cs\H3/Cs\H3' in my_dict[key1]:
        print(key1)
    

In [None]:
with open('correlated.pickle', 'wb') as handle:
    pickle.dump(uncertainty.kinetic_input_uncertainties, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
uncertainty.reaction_sources_dict[uncertainty.reaction_list[1289]]

In [None]:
with open('uncorrelated_var.pickle', 'rb') as handle:
    uncorrelated = pickle.load(handle)
with open('correlated_var.pickle', 'rb') as handle:
    correlated = pickle.load(handle)

In [None]:
f=0
for i in correlated:
    total = 0
    for item in i:
        total += i[item]
    if total != uncorrelated[f]:
        print(uncertainty.reaction_list[f].family)
        print(f, total, uncorrelated[f])
    
    
    f+=1

In [None]:
total = 0
for item in correlated[1289]:
    total += correlated[1289][item]
    print(correlated[1289][item])
    
print()
print(total)

In [None]:
uncorrelated[1289]

In [None]:
dir(uncertainty)