In [None]:
# notebook to compile the training reaction entries into a distance database

In [22]:
# load the distances
import os

import numpy as np

import autotst.data.base
import autotst.reaction
from rmgpy import settings
import rmgpy.data.base
import rmgpy.data.thermo

# import importlib
# importlib.reload(nameOfModule)

import rmgpy.molecule

In [2]:
family_name = 'Disproportionation'

In [3]:
# load the list of training reactions with distance data
global_context = {'__builtins__': None}
local_context = {}
local_context['DistanceData'] = autotst.data.base.DistanceData

depo = autotst.data.base.TransitionStateDepository()
training_path = '/home/moon/autoscience/AutoTST/database/Disproportionation/TS_training/reactions.py'
depo.load(training_path, local_context=local_context, global_context=global_context)



In [None]:
# each reaction has distance data
for key in depo.entries.keys():
    print(depo.entries[key].item, depo.entries[key].data)

In [4]:
# Load the Disproportionation Family
kinetics_db = rmgpy.data.kinetics.KineticsDatabase()
kinetics_db.load(
    os.path.join(settings['database.directory'], 'kinetics'),
    families=[family_name],
    libraries=[]
)


In [None]:
# load a thermo database
thermo_database = rmgpy.data.thermo.ThermoDatabase()
thermo_database.load(
    os.path.join(settings['database.directory'], 'thermo'),
    libraries=['primaryThermoLibrary']
)

In [None]:
kinetics_db.families[family_name].groups.entries['Root']

In [13]:
# create a dictionary to resemble a parallel structure to the BM tree
distances_tree = {}
for key in kinetics_db.families[family_name].rules.entries:
#     print(kinetics_db.families[family_name].rules.entries[key])
    label = kinetics_db.families[family_name].rules.entries[key][0].label
    distances_tree[label] = []

In [14]:
distances_tree.keys()

dict_keys(['Root', 'Root_Ext-1R!H-R', 'Root_Ext-2R!H-R', 'Root_4R->H', 'Root_N-4R->H', 'Root_Ext-1R!H-R_4R->O', 'Root_Ext-1R!H-R_N-4R->O', 'Root_Ext-2R!H-R_2R!H->C', 'Root_Ext-2R!H-R_N-2R!H->C', 'Root_4R->H_Sp-2R!H-1R!H', 'Root_4R->H_N-Sp-2R!H-1R!H', 'Root_N-4R->H_4CNOS-u1', 'Root_N-4R->H_N-4CNOS-u1', 'Root_Ext-1R!H-R_4R->O_Ext-4O-R', 'Root_Ext-1R!H-R_4R->O_Sp-5R!H-1R!H', 'Root_Ext-1R!H-R_4R->O_N-Sp-5R!H-1R!H', 'Root_Ext-1R!H-R_N-4R->O_Ext-1R!H-R', 'Root_Ext-1R!H-R_N-4R->O_Sp-5R!H=1R!H', 'Root_Ext-1R!H-R_N-4R->O_N-Sp-5R!H=1R!H', 'Root_Ext-2R!H-R_2R!H->C_4R->C', 'Root_Ext-2R!H-R_2R!H->C_N-4R->C', 'Root_Ext-2R!H-R_N-2R!H->C_4R->H', 'Root_Ext-2R!H-R_N-2R!H->C_N-4R->H', 'Root_4R->H_Sp-2R!H-1R!H_2R!H-u1', 'Root_4R->H_Sp-2R!H-1R!H_N-2R!H-u1', 'Root_4R->H_N-Sp-2R!H-1R!H_1R!H->C', 'Root_4R->H_N-Sp-2R!H-1R!H_N-1R!H->C', 'Root_N-4R->H_4CNOS-u1_1R!H->O', 'Root_N-4R->H_4CNOS-u1_N-1R!H->O', 'Root_N-4R->H_N-4CNOS-u1_1R!H->O', 'Root_N-4R->H_N-4CNOS-u1_N-1R!H->O', 'Root_Ext-1R!H-R_4R->O_Ext-4O-R_Sp-5R

In [16]:
distances_tree['Root']

[]

In [17]:
# Loop through all the reactions with distance data
# each reaction has distance data
for rxn_key in depo.entries.keys():
    reaction = depo.entries[rxn_key].item
    
    # find the nodes that reaction matches
    match_tree = kinetics_db.families[family_name].get_reaction_matches(rxns=[reaction])
    
    # add the distances to those nodes
    for key in match_tree.keys():
        if match_tree[key]:
            distances_tree[key].append(depo.entries[rxn_key].data)


In [18]:
distances_tree

{'Root': [DistanceData(distances={'d12': 1.243800,'d13': 2.492170,'d23': 1.254730,}, method='m062x/6-311+G(2df,2p)'),
  DistanceData(distances={'d12': 1.310120,'d13': 2.673870,'d23': 1.367300,}, method='m062x/6-311+G(2df,2p)'),
  DistanceData(distances={'d12': 1.586330,'d13': 2.645110,'d23': 1.063710,}, method='m062x/6-311+G(2df,2p)'),
  DistanceData(distances={'d12': 1.527400,'d13': 2.593570,'d23': 1.084890,}, method='m062x/6-311+G(2df,2p)'),
  DistanceData(distances={'d12': 1.288210,'d13': 2.301340,'d23': 1.013130,}, method='m062x/6-311+G(2df,2p)'),
  DistanceData(distances={'d12': 1.664730,'d13': 2.630110,'d23': 0.999274,}, method='m062x/6-311+g(2df,2p)'),
  DistanceData(distances={'d12': 1.314730,'d13': 2.647930,'d23': 1.341190,}, method='m062x/6-311+g(2df,2p)'),
  DistanceData(distances={'d12': 1.187610,'d13': 2.500860,'d23': 1.334700,}, method='m062x/6-311+g(2df,2p)'),
  DistanceData(distances={'d12': 1.360850,'d13': 2.648300,'d23': 1.363260,}, method='m062x/6-311+g(2df,2p)'),
  

In [53]:
print(kinetics_db.families[family_name].groups.entries['Root'].item.to_adjacency_list())

1 *2 R!H u0         {2,[S,D,B]} {3,S}
2 *3 R!H u[1,2]     {1,[S,D,B]}
3 *4 H   u0         {1,S}
4 *1 R   u[1,2,3,4]



In [57]:
# write the groups file
lines=[
    '#!/usr/bin/env python\n',
    '# encoding: utf-8\n',
    '\n',
    'name = "Disproportionation/TS_groups"\n',
    'short_desc = u""\n',
    'long_desc = u""\n',
    '\n'
]

for key in distances_tree.keys():
    datas = distances_tree[key]
    
    
    
    index = kinetics_db.families[family_name].groups.entries[key].index  # TODO
    label = kinetics_db.families[family_name].groups.entries[key].label  # TODO
    group = kinetics_db.families[family_name].groups.entries[key].item.to_adjacency_list()
    
    
    lines.append('entry(\n')
    lines.append(f'    index = {index},\n')
    lines.append(f'    label = "{label}",\n')
    lines.append(f'    group = \n')
    lines.append(f'"""\n')
    lines.append(group + '\n')
    lines.append(f'""",\n')
    
    if len(datas) > 0 and datas[0]:
        d12 = np.array([data.distances['d12'] for data in datas])
        d23 = np.array([data.distances['d23'] for data in datas])
        d13 = np.array([data.distances['d13'] for data in datas])

        
    
        lines.append('    distances = DistanceData(\n')
        lines.append('        distances = {"d12": ' + str(np.average(d12)) + ', "d13": ' + str(np.average(d13)) + ', "d23": ' + str(np.average(d23))+'},\n')
        lines.append('        uncertainties = {"d12": ' + str(np.var(d12)) + ', "d13": ' + str(np.var(d13)) + ', "d23": ' + str(np.var(d23))+'},\n')
        lines.append('    ),\n\n')
    
    
    else:  # no data
        lines.append('    distances = DistanceData(distances={}),\n')

    lines.append(')\n')
    lines.append('\n')

with open('TS_groups.py', 'w') as f:
    f.writelines(lines)
#     print(np.average(d12), np.std(d12))
#     for data in datas:
#         print(data.distances['d12'])