In [1]:
import pandas as pd
import os
import numpy as np
import re
from typing import List
import math
from scipy.interpolate import BPoly
import matplotlib.pyplot as plt

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
%load_ext autoreload
%autoreload 2

params = {"font.family": "Arial", 'mathtext.default': 'regular'}      
plt.rc('font', size=8)
plt.rcParams.update(params)
plt.rcParams['axes.facecolor'] = 'white'

%matplotlib inline

In [2]:
def get_second_order_barrier_correction(concentration, temperature):
    return math.log(1/concentration)*temperature*8.314/1000/4.184

def split_filename(s):
    match = re.match(r'([^-]+-[^-]+)-(.*)', s)
    if match:
        return match.groups()
    else:
        return s, ''

def get_energy(df, structure_name):
    return df[df['structure'] == structure_name]['qh-G(T)_SPC'].values[0]

def get_reference_gibbs_energy(df, structure_name):
    return df[df['label_name'] == structure_name]['reference_gibbs'].values[0]

def get_reactant_energy(structure_name, functionalization=""):
    if "naph-" in structure_name:
        # remove -c or -t from structure_name
        structure_name = structure_name.split("-")[0]
    return base_reactants_df[base_reactants_df['structure'] == f"{structure_name}"]['qh-G(T)_SPC'].values[0]

def get_ligand_energy(df, ligand):
    return df[(df['group'] == ligand) & (df['structure'] == '99-lig')]['qh-G(T)_SPC'].values[0]

def get_reference_energy(df, ligand, functionalization=None):
    if not functionalization:
        return df[(df['structure'] == '00-lpdoh2') & (df['group'] == ligand)]['equalized_gibbs'].values[0]
    else:
        return df[(df['structure'] == '00-lpdoh2') & (df['group'] == ligand) & (df['functionalization'] == functionalization)]['equalized_gibbs'].values[0]

def get_ester_reference_energy(df, ligand, functionalization=None):
    if not functionalization:
        return df[(df['structure'] == 'e00-lpdoh2') & (df['group'] == ligand)]['equalized_gibbs'].values[0]
    else:
        return df[(df['structure'] == 'e00-lpdoh2') & (df['group'] == ligand) & (df['functionalization'] == functionalization)]['equalized_gibbs'].values[0]

def get_precatalyst_energy(df, ligand):
    return (get_energy(df, "100-l2pd2oh4") - 2*get_ligand_energy(ligands_df, ligand))

def equalize_reference(row, ligand, functionalization=""):
    if row['structure'] == '00-lpdoh2':
        return row['qh-G(T)_SPC'] / 2 + 2*get_energy(base_reactants_df, f'{functionalization}naphboh2') + 2*get_energy(base_reactants_df, 'h2o') + get_ligand_energy(ligands_df, ligand)
    
    elif row['structure'] in ['01-rxt', '02-ts-rxt-c1', '03-c1', '04-ts-c1-c2', '05-c2',
                            '12-ts-rxt-t1', '13-t1', '14-ts-t1-t2', '15-t2', 
                            '27-ts-t2-xa',  '30-ts-t2-xb',  
                            '33-ts-rxt-ya', '34-ya', '35-ts-ya-c1', '58-c2alt',
                            '63-t3ob', '64-ts-t3ob-t4ob', '65-t4ob', '66-ts-t2-t4ob', '73-ts-t4ob-t5ob', '74-t5ob-rh'
                            ]:
        return row['qh-G(T)_SPC'] + 2*get_energy(base_reactants_df, 'h2o') + get_energy(base_reactants_df, f'{functionalization}naphboh2') + get_ligand_energy(ligands_df, ligand)
    
    elif row['structure'] in ['06-c2-h2o', '07-ts-c2-c3', '08-c3-boh3',
                            '16-t2-h2o', '17-ts-t2-t3', '18-t3-boh3',
                            '22-c5', '23-ts-c5-c6', '24-t5', '25-ts-t5-t6',
                            '36-ts-rxt-yb', '37-yb', '38-ts-yb-c1', '26-t6',
                            '62-t3ob-h2o', '67-t4ob-h2o', '68-ts-t4ob-t4-4mps', '69-t4-boh3', '82-ts-t2-t3-4mps'
                            ]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, f'{functionalization}naphboh2') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'h2o')
    
    elif row['structure'] in ['28-xa', '31-xb',]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, 'boh3') + 2*get_energy(base_reactants_df, 'h2o') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, f'{functionalization}naphboh2')

    elif row['structure'] in ['29-ts-xa-t3', '32-ts-xb-t3',]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, f'{functionalization}naphboh2') + get_energy(base_reactants_df, 'boh3') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'h2o')
    
    elif row['structure'] in ['59-t2-2h2o', '60-ts-t2-t3ob', '61-t3ob-2h2o', '70-t4ob-2h2o', '71-ts-t4ob-t4-6mps', '72-t4-boh3h2o', '83-ts-t2-t3-6mps', '84-t3-boh3h2o']:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, f'{functionalization}naphboh2') + get_ligand_energy(ligands_df, ligand)

    elif row['structure'] in ['09-c3', '10-ts-c3-c4', '11-c4',
                            '19-t3', '20-ts-t3-t4', '21-t4',
                            ]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, 'boh3') + get_energy(base_reactants_df, f'{functionalization}naphboh2') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'h2o')
    
    elif row['structure'] in ['39-t2-lig', '40-ts-t2-p1', '41-p1-boh3',]:
        return row['qh-G(T)_SPC'] + 2*get_energy(base_reactants_df, 'h2o') + get_energy(base_reactants_df, f'{functionalization}naphboh2')
    
    elif row['structure'] in ['42-p1']:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, 'boh3') + 2*get_energy(base_reactants_df, 'h2o') + get_energy(base_reactants_df, f'{functionalization}naphboh2')

    elif row['structure'] in ['43-t3-lig', '44-ts-t3-p1', '45-p1-h2o',]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, 'h2o') + get_energy(base_reactants_df, f'{functionalization}naphboh2') + get_energy(base_reactants_df, 'boh3')
    
    elif row['structure'] in ['75-t5ob',]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, f'{functionalization}naph') + 2*get_energy(base_reactants_df, 'h2o') + get_energy(base_reactants_df, f'{functionalization}naphboh2') + get_ligand_energy(ligands_df, ligand)
    
    elif row['structure'] in ['76-t5ob-1h2o', '77-ts-t5ob-t6ob-4mps', '78-t6ob']:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, f'{functionalization}naph') + get_energy(base_reactants_df,  f'{functionalization}naphboh2') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'h2o')
    
    elif row['structure'] in ['79-t5ob-2h2o', '80-ts-t5ob-t6ob-6mps', '81-t6ob-1h2o']:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, f'{functionalization}naph') + get_energy(base_reactants_df,  f'{functionalization}naphboh2') + get_ligand_energy(ligands_df, ligand)
    elif row['structure'] in ['85-c2dim']:
        return row['qh-G(T)_SPC']/2 + 2*get_energy(base_reactants_df, 'h2o') + get_energy(base_reactants_df,  f'{functionalization}naphboh2') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'boh3')
    elif row['structure'] in ['86-c2-naphboh2', '87-ts-c2-naphboh2-hc1', '88-hc1-boh3']:
        return row['qh-G(T)_SPC'] + 2*get_energy(base_reactants_df, 'h2o') + get_ligand_energy(ligands_df, ligand)
    elif row['structure'] in ['89-hc1', '90-ts-hc1-hc2', '91-hc2', '92-ts-hc2-hc3', '93-hc3', '94-ts-hc3-pd0boh3']:
        return row['qh-G(T)_SPC'] + 2*get_energy(base_reactants_df, 'h2o') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'boh3')
    elif row['structure'] in ['95-lpd0boh3']:
        return row['qh-G(T)_SPC'] + 2*get_energy(base_reactants_df, 'h2o') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'boh3') + get_energy(base_reactants_df, 'binaph')
    elif row['structure'] in ['96-hc4', '97-ts-hc4-lpd0']:
        return row['qh-G(T)_SPC'] + 2*get_energy(base_reactants_df, 'h2o') + get_ligand_energy(ligands_df, ligand) + 2*get_energy(base_reactants_df, 'boh3')
    elif row['structure'] in ['98-lpd0']:
        return row['qh-G(T)_SPC'] + 2*get_energy(base_reactants_df, 'h2o') + get_ligand_energy(ligands_df, ligand) + 2*get_energy(base_reactants_df, 'boh3') + get_energy(base_reactants_df, 'binaph')
    elif row['structure'] == 'e00-lpdoh2':
        return row['qh-G(T)_SPC'] / 2 + 2*get_energy(base_reactants_df,  f'{functionalization}naphbpin') + 2*get_energy(base_reactants_df, 'h2o') + get_ligand_energy(ligands_df, ligand)
    
    elif row['structure'] in ['e01-ecrxt', 'e02-ts-ecrxt-ec1', 'e03-ec1', 'e04-ts-ec1-ec2', 'e05-ec2',
                            'e12-etrxt', 'e13-ts-etrxt-et1', 'e14-et1', 'e15-ts-et1-et2', 'e16-et2', 
                            'e23-ts-etrxt-ec1b', 'e24-ec1b', 'e25-ts-ec1b-ec2b', 'e26-ec2b', 'e30-ts-ecrxt-et1b', 'e31-et1b', 'e32-ts-et1b-et2b', 'e33-et2b',
                            ]:
        return row['qh-G(T)_SPC'] + 2*get_energy(base_reactants_df, 'h2o') + get_energy(base_reactants_df, f'{functionalization}naphbpin') + get_ligand_energy(ligands_df, ligand)
    
    elif row['structure'] in ['e06-ec2-h2o', 'e07-ts-ec2-ec3', 'e08-ec3-bpinoh',
                            'e17-et2-h2o', 'e18-ts-et2-et3', 'e19-et3-bpinoh',
                            'e27-ec2b-h2o', 'e28-ts-ec2b-ec3b', 'e29-ec3b-bpinoh', 'e34-et2b-h2o', 'e35-ts-et2b-et3b', 'e36-et3b-bpinoh',
                            ]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, f'{functionalization}naphbpin') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'h2o')
    elif row['structure'] in ['e09-ec3', 'e10-ts-ec3-ec4', 'e11-ec4',
                            'e20-et3', 'e21-ts-et3-et4', 'e22-et4',
                            ]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, 'bpinoh') + get_energy(base_reactants_df, f'{functionalization}naphbpin') + get_ligand_energy(ligands_df, ligand) + get_energy(base_reactants_df, 'h2o')
    else:
        print(f"Messed up {row}")

def get_main_microkinetics_transformations(type_name="main", functionalization=""):
    int_list = ['00-lpdoh2', '01-rxt', '03-c1', '05-c2', '09-c3', '11-c4', '13-t1', '15-t2', '19-t3', '21-t4', '100-l2pd2oh4']
    ts_list = ['02-ts-rxt-c1', '04-ts-c1-c2', '07-ts-c2-c3', '10-ts-c3-c4', '12-ts-rxt-t1', '14-ts-t1-t2', '17-ts-t2-t3', '20-ts-t3-t4', ]    
    rxt_pdt_list = [f'{functionalization}naphboh2', 'h2o', 'boh3', f'{functionalization}naph-c', f'{functionalization}naph-t']
    diffusion_ts_list = ['diffusion-l2pd2oh4-lpdoh2', 'diffusion-lpdoh2-rxt', 'diffusion-c4-pdt', 'diffusion-t4-pdt']
    xform_dict = {'diffusion-l2pd2oh4-lpdoh2': ['100-l2pd2oh4', '', '00-lpdoh2', '00-lpdoh2', True],
                    'diffusion-lpdoh2-rxt': ['00-lpdoh2', f'{functionalization}naphboh2', '01-rxt', '', True],
                    '02-ts-rxt-c1': ['01-rxt', '', '03-c1', '', True],
                    '04-ts-c1-c2': ['03-c1', '', '05-c2', '', True],
                    '07-ts-c2-c3': ['05-c2', 'h2o', '09-c3', 'boh3', True],
                    '10-ts-c3-c4': ['09-c3', '', '11-c4', '', True],
                    'diffusion-c4-pdt': ['11-c4', '', f'{functionalization}naph-c', '00-lpdoh2', True],
                    '12-ts-rxt-t1': ['01-rxt', '', '13-t1', '', True], 
                    '14-ts-t1-t2': ['13-t1', '', '15-t2', '', True],
                    '17-ts-t2-t3': ['15-t2', 'h2o', '19-t3', 'boh3', True],
                    '20-ts-t3-t4': ['19-t3', '', '21-t4', '', True],
                    'diffusion-t4-pdt': ['21-t4', '', f'{functionalization}naph-t', '00-lpdoh2', True],  
    }
    if type_name == "main":
        pass
    elif type_name == "xa":
        int_list += ['28-xa']
        ts_list += ['27-ts-t2-xa', '29-ts-xa-t3']
        xform_dict.update({'27-ts-t2-xa': ['15-t2', '', '28-xa', 'boh3', True],
                            '29-ts-xa-t3': ['28-xa', 'h2o', '19-t3', '', True]})
    elif type_name == "xb":
        int_list += ['31-xb']
        ts_list += ['30-ts-t2-xb', '32-ts-xb-t3']
        xform_dict.update({'30-ts-t2-xb': ['15-t2', '', '31-xb', 'boh3', True],
                            '32-ts-xb-t3': ['31-xb', 'h2o', '19-t3', '', True]})
    elif type_name == "xaxb":
        int_list += ['28-xa', '31-xb']
        ts_list += ['27-ts-t2-xa', '29-ts-xa-t3', '30-ts-t2-xb', '32-ts-xb-t3']
        xform_dict.update({'27-ts-t2-xa': ['15-t2', '', '28-xa', 'boh3', True],
                            '29-ts-xa-t3': ['28-xa', 'h2o', '19-t3', '', True],
                            '30-ts-t2-xb': ['15-t2', '', '31-xb', 'boh3', True],
                            '32-ts-xb-t3': ['31-xb', 'h2o', '19-t3', '', True]})
    elif type_name == "ya":
        int_list += ['34-ya']
        ts_list += ['33-ts-rxt-ya', '35-ts-ya-c1']
        xform_dict.update({'33-ts-rxt-ya': ['01-rxt', '', '34-ya', '', True],
                            '35-ts-ya-c1': ['34-ya', '', '03-c1', '', True]})
    elif type_name == "yb":
        int_list += ['37-yb']
        ts_list += ['36-ts-rxt-yb', '38-ts-yb-c1']
        xform_dict.update({'36-ts-rxt-yb': ['01-rxt', 'h2o', '37-yb', '', True],
                            '38-ts-yb-c1': ['37-yb', '', '03-c1', 'h2o', True]})
    elif type_name == "yayb":
        int_list += ['34-ya', '37-yb']
        ts_list += ['33-ts-rxt-ya', '35-ts-ya-c1', '36-ts-rxt-yb', '38-ts-yb-c1']
        xform_dict.update({'33-ts-rxt-ya': ['01-rxt', '', '34-ya', '', True],
                            '35-ts-ya-c1': ['34-ya', '', '03-c1', '', True],
                            '36-ts-rxt-yb': ['01-rxt', 'h2o', '37-yb', '', True],
                            '38-ts-yb-c1': ['37-yb', '', '03-c1', 'h2o', True]})
    elif type_name == "p1":
        int_list += ['42-p1']
        ts_list += ['40-ts-t2-p1', '44-ts-t3-p1']
        rxt_pdt_list += ['pre-catalyst-dimer', 'pre-catalyst-monomer', 'ligand']
        diffusion_ts_list += ['diffusion-pd2oh4-pdoh2', 'diffusion-pdoh2-l']
        xform_dict.update({'40-ts-t2-p1': ['15-t2', 'ligand', '42-p1', 'boh3', True],
                            '44-ts-t3-p1': ['19-t3', 'ligand', '42-p1', 'h2o', True],
                            'diffusion-pd2oh4-pdoh2': ['pre-catalyst-dimer', '', 'pre-catalyst-monomer', 'pre-catalyst-monomer', True],
                            'diffusion-pdoh2-l': ['pre-catalyst-monomer', 'ligand', '00-lpdoh2', '', True]})
    elif type_name == 'ob':
        int_list += ["16-t2-h2o", "18-t3-boh3", "59-t2-2h2o", "61-t3ob-2h2o", "62-t3ob-h2o", "63-t3ob", "65-t4ob", "67-t4ob-h2o", "69-t4-boh3", "70-t4ob-2h2o", "72-t4-boh3h2o",
                      "74-t5ob-rh", "75-t5ob", "76-t5ob-1h2o", "78-t6ob", "79-t5ob-2h2o", "81-t6ob-1h2o", "84-t3-boh3h2o"]
        ts_list += ["60-ts-t2-t3ob", "64-ts-t3ob-t4ob", "66-ts-t2-t4ob", "68-ts-t4ob-t4-4mps", "71-ts-t4ob-t4-6mps", "73-ts-t4ob-t5ob", 
                    "77-ts-t5ob-t6ob-4mps", "80-ts-t5ob-t6ob-6mps", "82-ts-t2-t3-4mps", "83-ts-t2-t3-6mps"]
        diffusion_ts_list += ["diffusion-15-16", "diffusion-18-19", "diffusion-16-300", "diffusion-61-303", "diffusion-62-304", "diffusion-65-308", "diffusion-69-21", 
                              "diffusion-67-311", "diffusion-72-310", "diffusion-69-21", "diffusion-74-316", "diffusion-75-317", "diffusion-76-320", "diffusion-78-322",
                              "diffusion-84-18", "diffusion-78-pdt"]
        xform_dict.update({"60-ts-t2-t3ob": ["59-t2-2h2o", '', "61-t3ob-2h2o", '', True],
                           "64-ts-t3ob-t4ob": ["63-t3ob", '', "65-t4ob", '', True],
                           "66-ts-t2-t4ob": ["15-t2", '', "65-t4ob", '', True],
                           "68-ts-t4ob-t4-4mps": ["67-t4ob-h2o", '', "69-t4-boh3", '', True],
                            "71-ts-t4ob-t4-6mps": ["70-t4ob-2h2o", '', "72-t4-boh3h2o", '', True],
                            "73-ts-t4ob-t5ob": ["65-t4ob", '', "74-t5ob-rh", '', True],
                            "77-ts-t5ob-t6ob-4mps": ["76-t5ob-1h2o", '', "78-t6ob", '', True],
                            "80-ts-t5ob-t6ob-6mps": ["79-t5ob-2h2o", '', "81-t6ob-1h2o", '', True],
                            "82-ts-t2-t3-4mps": ["16-t2-h2o", '', "18-t3-boh3", '', True],
                            "83-ts-t2-t3-6mps": ["59-t2-2h2o", '', "84-t3-boh3h2o", '', True],
                            "diffusion-15-16": ['15-t2', 'h2o', '16-t2-h2o', '', True],
                            "diffusion-18-19": ['18-t3-boh3', '', '19-t3', 'boh3', True],
                            "diffusion-16-300": ['16-t2-h2o', 'h2o', '59-t2-2h2o', '', True],
                            "diffusion-61-303": ['61-t3ob-2h2o', '', '62-t3ob-h2o', 'h2o', True],
                            "diffusion-62-304": ['62-t3ob-h2o', '', '63-t3ob', 'h2o', True],
                            "diffusion-65-308": ['65-t4ob', 'h2o', '67-t4ob-h2o', '', True],
                            "diffusion-69-21": ['69-t4-boh3', '', '21-t4', 'boh3', True],
                            "diffusion-67-311": ['67-t4ob-h2o', 'h2o', '70-t4ob-2h2o', '', True],
                            "diffusion-72-310": ['72-t4-boh3h2o', '', '69-t4-boh3', 'h2o', True],
                            "diffusion-69-21": ['69-t4-boh3', '', '21-t4', 'boh3', True],
                            "diffusion-74-316": ['74-t5ob-rh', '', '75-t5ob', 'naph-t', True],
                            "diffusion-75-317": ['75-t5ob', 'h2o', '76-t5ob-1h2o', '', True],
                            "diffusion-76-320": ['76-t5ob-1h2o', 'h2o', '79-t5ob-2h2o', '', True],
                            "diffusion-78-322": ['78-t6ob', 'h2o', '81-t6ob-1h2o', '', True],
                            "diffusion-84-18": ['84-t3-boh3h2o', '', '18-t3-boh3', 'h2o', True],
                            "diffusion-78-pdt": ['78-t6ob', '', '00-lpdoh2', 'boh3', True]
                            })


    elif type_name == 'hc':
        int_list += ["85-c2dim", "86-c2-naphboh2", "88-hc1-boh3", "89-hc1", "91-hc2", "93-hc3", "95-lpd0boh3", "96-hc4", "98-lpd0"]
        ts_list += ["87-ts-c2-naphboh2-hc1", "90-ts-hc1-hc2", "92-ts-hc2-hc3", "94-ts-hc3-pd0boh3", "97-ts-hc4-lpd0"]
        rxt_pdt_list += ['binaph']
        diffusion_ts_list += ['diffusion-05-70', 'diffusion-05-71', "diffusion-73-72", 'diffusion-78-81', 'diffusion-80-83']
        xform_dict.update({"87-ts-c2-naphboh2-hc1": ["86-c2-naphboh2", '', "88-hc1-boh3", '', True],
                            "90-ts-hc1-hc2": ["89-hc1", '', "91-hc2", '', True],
                            "92-ts-hc2-hc3": ["91-hc2", '', "93-hc3", '', True],
                            "94-ts-hc3-pd0boh3": ["93-hc3", '', "95-lpd0boh3", 'binaph', True],
                            "97-ts-hc4-lpd0": ["96-hc4", '', "98-lpd0", 'binaph', True],
                            'diffusion-05-70': ['05-c2', '', '85-c2dim', 'boh3', True],
                            'diffusion-05-71': ['05-c2', 'naphboh2', '86-c2-naphboh2', '', True],
                            "diffusion-73-72": ['88-hc1-boh3', '', '89-hc1', 'boh3', True],
                            'diffusion-78-81': ['93-hc3', '', '96-hc4', 'boh3', True],
                            'diffusion-80-83': ['95-lpd0boh3', '', '98-lpd0', 'boh3', True]
                            })
    elif type_name == "ester":
        int_list = ['e00-lpdoh2', 'e01-ecrxt', 'e03-ec1', 'e05-ec2', 'e09-ec3', 'e11-ec4', 'e12-etrxt', 'e14-et1', 'e16-et2', 'e20-et3', 'e22-et4', 'e100-l2pd2oh4']
        ts_list = ['e02-ts-ecrxt-ec1', 'e04-ts-ec1-ec2', 'e07-ts-ec2-ec3', 'e10-ts-ec3-ec4', 'e13-ts-etrxt-et1', 'e15-ts-et1-et2', 'e18-ts-et2-et3', 'e21-ts-et3-et4']
        rxt_pdt_list = [f'{functionalization}naphbpin', 'h2o', 'bpinoh', f'{functionalization}naph-c', f'{functionalization}naph-t']
        diffusion_ts_list = ['diffusion-l2pd2oh4-lpdoh2', 'diffusion-lpdoh2-ecrxt', 'diffusion-lpdoh2-etrxt', 'diffusion-ec4-pdt', 'diffusion-et4-pdt']
        xform_dict = {'diffusion-l2pd2oh4-lpdoh2': ['e100-l2pd2oh4', '', 'e00-lpdoh2', 'e00-lpdoh2', True],
                      'diffusion-lpdoh2-ecrxt': ['e00-lpdoh2', f'{functionalization}naphbpin', 'e01-ecrxt', '', True],
                      'diffusion-lpdoh2-etrxt': ['e00-lpdoh2', f'{functionalization}naphbpin', 'e12-etrxt', '', True],
                        'e02-ts-ecrxt-ec1': ['e01-ecrxt', '', 'e03-ec1', '', True],
                        'e04-ts-ec1-ec2': ['e03-ec1', '', 'e05-ec2', '', True],
                        'e07-ts-ec2-ec3': ['e05-ec2', 'h2o', 'e09-ec3', 'bpinoh', True],
                        'e10-ts-ec3-ec4': ['e09-ec3', '', 'e11-ec4', '', True],
                        'diffusion-ec4-pdt': ['e11-ec4', '', f'{functionalization}naph-c', 'e00-lpdoh2', True],
                        'e12-etrxt': ['e00-lpdoh2', f'{functionalization}naphbpin', 'e12-etrxt', '', True],
                        'e13-ts-etrxt-et1': ['e12-etrxt', '', 'e14-et1', '', True],
                        'e15-ts-et1-et2': ['e14-et1', '', 'e16-et2', '', True],
                        'e18-ts-et2-et3': ['e16-et2', 'h2o', 'e20-et3', 'bpinoh', True],
                        'e21-ts-et3-et4': ['e20-et3', '', 'e22-et4', '', True],
                        'diffusion-et4-pdt': ['e22-et4', '', f'{functionalization}naph-t', 'e00-lpdoh2', True],
        }
                        
                        
    else:
        print("Invalid alternative mechanism")
    
    return int_list, ts_list, rxt_pdt_list, diffusion_ts_list, xform_dict


def get_left_right_barrier_reference_energies(df, xform: List[str]):
    if xform[1] == '':
        energy_l = float(get_energy(df, xform[0]))
    else:
        energy_l = float(get_energy(df, xform[0])) + float(get_energy(df, xform[1]))
    if xform[3] == '':
        energy_r = float(get_energy(df, xform[2]))
    else:
        energy_r = float(get_energy(df, xform[2])) + float(get_energy(df, xform[3]))
        
    return energy_l, energy_r


nice_main_names = {
    "00-lpdoh2": "[LPd(OH)$_2$]$_2$",
    "01-rxt": "RXT",
    "02-ts-rxt-c1": "RXT-C1",
    "03-c1": "C1",
    "04-ts-c1-c2": "C1-C2",
    "05-c2": "C2",
    "06-c2-h2o": "C2 + H$_2$O", 
    "07-ts-c2-c3": "C2-C3",
    "08-c3-boh3": "C3 + B(OH)$_3$", 
    "09-c3": "C3",
    "10-ts-c3-c4": "C3-C4",
    "11-c4": "C4",
    "12-ts-rxt-t1": "RXT-T1",
    "13-t1": "T1",
    "14-ts-t1-t2": "T1-T2",
    "15-t2": "T2",
    "16-t2-h2o": "T2 + H$_2$O", 
    "17-ts-t2-t3": "T2-T3",
    "18-t3-boh3": "T3 + B(OH)$_3$", 
    "19-t3": "T3",
    "20-ts-t3-t4": "T3-T4",
    "21-t4": "T4",
}

nice_altpdb_names = {
    "22-c5": "C5",
    "23-ts-c5-c6": "C5-C6",
    "24-t5": "T5",
    "25-ts-t5-t6": "T5-T6",
    "26-t6": "T6",
}

nice_xaxb_names = {
    "27-ts-t2-xa": "T2-XA",
    "28-xa": "XA",
    "29-ts-xa-t3": "XA-T3",
    "30-ts-t2-xb": "T2-XB",
    "31-xb": "XB",
    "32-ts-xb-t3": "XB-T3",
}

nice_yayb_names = {
    "33-ts-rxt-ya": "RXT-YA",
    "34-ya": "YA",
    "35-ts-ya-c1": "YA-C1",
    "36-ts-rxt-yb": "RXT-YB",
    "37-yb": "YB",
    "38-ts-yb-c1": "YB-C1",
}

nice_p1_names = {
    "39-t2-lig": "T2 + L",
    "40-ts-t2-p1": "T2-P1",
    "41-p1-boh3": "P1 + B(OH)$_3$",
    "42-p1": "P1",
    "43-t3-lig": "T3 + L",
    "44-ts-t3-p1": "T3-P1",
    "45-p1-h2o": "P1 + H$_2$O",
}

nice_alternative_names = {
    "46-ub-ref": "$\mu$B-Ref",
    "47-oacu-ref": "OAc\$mu$-Ref",
    "48-c-ref": "C-Ref",
    "49-t-ref": "T-Ref",
    "50-cl-ref": "CL-Ref",
    "51-tl-ref": "TL-Ref",
    "52-cs-ref": "CS-Ref",
    "53-ts-ref": "TS-Ref",
    "54-cw-ref": "CW-Ref",
    "55-tw-ref": "TW-Ref",
    "56-oh-ref": "OH-Ref",
    "57-koh-ref": "KOH-Ref",
    "58-c2alt": "C2$_{alt}$",
    "85-c2dim": "C2$_{dimer}$",
}

nice_ob_names = {
    "59-t2-2h2o": "T2 + 2H$_2$O",
    "60-ts-t2-t3ob": "T2-T3OB",
    "61-t3ob-2h2o": "T3OB + 2H$_2$O",
    "62-t3ob-h2o": "T3OB + H$_2$O",
    "63-t3ob": "T3OB",
    "64-ts-t3ob-t4ob": "T3OB-T4OB",
    "65-t4ob": "T4OB",
    "66-ts-t2-t4ob": "T2-T4OB",
    "67-t4ob-h2o": "T4OB + H$_2$O",
    "68-ts-t4ob-t4-4mps": "T4OB-T4-4MPS",
    "69-t4-boh3": "T4 + B(OH)$_3$",
    "70-t4ob-2h2o": "T4OB + 2H$_2$O",
    "71-ts-t4ob-t4-6mps": "T4OB-T4-6MPS",
    "72-t4-boh3h2o": "T4 + B(OH)$_3$ + H$_2$O",
    "73-ts-t4ob-t5ob": "T4OB-T5OB",
    "74-t5ob-rh": "T5OB + R-H",
    "75-t5ob": "T5OB",
    "76-t5ob-1h2o": "T5OB + H$_2$O",
    "77-ts-t5ob-t6ob-4mps": "T5OB-T6OB-4MPS",
    "78-t6ob": "T6OB",
    "79-t5ob-2h2o": "T5OB + 2H$_2$O",
    "80-ts-t5ob-t6ob-6mps": "T5OB-T6OB-6MPS",
    "81-t6ob-1h2o": "T6OB + H$_2$O",
    "82-ts-t2-t3-4mps": "T2-T3-4MPS",
    "83-ts-t2-t3-6mps": "T2-T3-6MPS",
    "84-t3-boh3h2o": "T3 + B(OH)$_3$ + H$_2$O",
}

nice_hc_names = {
    "85-c2dim": "C2$_{dimer}$",
    "86-c2-naphboh2": "C2 + NaphB(OH)$_2$",
    "87-ts-c2-naphboh2-hc1": "C2 + NaphB(OH)$_2$-HC1",
    "88-hc1-boh3": "HC1 + B(OH)$_3$",
    "89-hc1": "HC1",
    "90-ts-hc1-hc2": "HC1-HC2",
    "91-hc2": "HC2",
    "92-ts-hc2-hc3": "HC2-HC3",
    "93-hc3": "HC3",
    "94-ts-hc3-pd0boh3": "HC3-LPd(0)B(OH)$_3$",
    "95-lpd0boh3": "LPd(0)B(OH)$_3$",
    "96-hc4": "HC4 + B(OH)$_3$",
    "97-ts-hc4-lpd0": "HC4-LPd(0)",
    "98-lpd0": "LPd(0) + B(OH)$_3$"
}

nice_unlig_names = {
    "00-murxt": "$\mu$-RXT",
    "01-rxt": "RXT",
    "02-ts-rxt-int1": "RXT-INT1",
    "03-int1": "INT1",
    "04-ts-int1-int2": "INT1-INT2",
    "05-int2": "INT2",
    "06-int2-h2o": "INT2 + H$_2$O", 
    "07-ts-int2-int3": "INT2-INT3",
    "08-int3-boh3": "INT3 + B(OH)$_3$", 
    "09-int3": "INT3",
    "10-ts-int3-int4": "INT3-INT4",
    "11-int4": "INT4",
    "12-ts-int2-int5c": "INT2-INT5C",
    "13-int5c": "INT5C",
    "14-ts-int5c-int6c": "INT5C-INT6C",
    "15-int6c": "INT6C",
    "16-int6c-h2o": "INT6C + H$_2$O",
    "17-ts-int6c-int7c": "INT6C-INT7C",
    "18-int7c-boh3": "INT7C + B(OH)$_3$",
    "19-int7c": "INT7C",
    "20-ts-int7c-int8c": "INT7C-INT8C",
    "21-int8c": "INT8C",
    "22-ts-int2-int5t": "INT2-INT5T",
    "23-int5t": "INT5T",
    "24-ts-int5t-int6t": "INT5T-INT6T",
    "25-int6t": "INT6T",
    "26-int6t-h2o": "INT6T + H$_2$O",
    "27-ts-int6t-int7t": "INT6T-INT7T",
    "28-int7t-boh3": "INT7T + B(OH)$_3$",
    "29-int7t": "INT7T",
    "30-ts-int7t-int8t": "INT7T-INT8T",
    "31-int8t": "INT8T",
    "32-rmurxt": "R$\mu$-RXT",
    "33-ts-int6c-re": "INT6C-RE",
}

nice_ester_names = {
    "e00-lpdoh2": "[LPd(OH)$_2$]$_2$",
    "e01-ecrxt": "ECRXT",
    "e02-ts-ecrxt-ec1": "ECRXT-EC1",
    "e03-ec1": "EC1",
    "e04-ts-ec1-ec2": "EC1-EC2",
    "e05-ec2": "EC2",
    "e06-ec2-h2o": "EC2 + H$_2$O", 
    "e07-ts-ec2-ec3": "EC2-EC3",
    "e08-ec3-bpinoh": "EC3 + BpinOH", 
    "e09-ec3": "EC3", 
    "e10-ts-ec3-ec4": "EC3-EC4",
    "e11-ec4": "EC4",
    "e12-etrxt": "ETRXT",
    "e13-ts-etrxt-et1": "ETRXT-ET1",
    "e14-et1": "ET1",
    "e15-ts-et1-et2": "ET1-ET2",
    "e16-et2": "ET2",
    "e17-et2-h2o": "ET2 + H$_2$O", 
    "e18-ts-et2-et3": "ET2-ET3",
    "e19-et3-bpinoh": "ET3 + BpinOH", 
    "e20-et3": "ET3",
    "e21-ts-et3-et4": "ET3-ET4",
    "e22-et4": "ET4",
    "e23-ts-etrxt-ec1b": "ETRXT-EC1B",
    "e24-ec1b": "EC1B",
    "e25-ts-ec1b-ec2b": "EC1B-EC2B",
    "e26-ec2b": "EC2B",
    "e27-ec2b-h2o": "EC2B + H$_2$O",
    "e28-ts-ec2b-ec3b": "EC2B-EC3B",
    "e29-ec3b-bpinoh": "EC3B + BpinOH",
    "e30-ts-ecrxt-et1b": "ECRXT-ET1B",
    "e31-et1b": "ET1B",
    "e32-ts-et1b-et2b": "ET1B-ET2B",
    "e33-et2b": "ET2B",
    "e34-et2b-h2o": "ET2B + H$_2$O",
    "e35-ts-et2b-et3b": "ET2B-ET3B",
    "e36-et3b-bpinoh": "ET3B + BpinOH",
}

functionalizations = ['6ome', '6cn']
nice_functionalized_names = {}
for funct in functionalizations:
    for key, val in nice_main_names.items():
        nice_functionalized_names[f"{funct}{key}"] = val
    for key, val in nice_ester_names.items():
        nice_functionalized_names[f"{funct}{key}"] = val
        

nice_structure_names = {**nice_main_names, **nice_altpdb_names, **nice_xaxb_names, **nice_yayb_names, **nice_p1_names, 
                        **nice_alternative_names, **nice_ob_names, **nice_hc_names, **nice_unlig_names, **nice_ester_names, 
                        **nice_functionalized_names}

proper_names_dict = {'3z-adjohnphos': "AdJohnPhos",
 '3aa-adbrettphos': "AdBrettPhos",
 '3s-brettphos': "BrettPhos",
 '3m-cyjohnphos': "CyJohnPhos",
 '3n-cymephos': "CyMePhos",
 '3o-davephos': "DavePhos",
 '3j-iprjohnphos': "(i-Pr)JohnPhos",
 '3y-me4tbuxphos': "Me$_4$tBuXPhos",
 '3i-pad3': "P(Ad)$_3$",
 '3d-pcpt3': "P(Cpt)$_3$",
 '3f-pcy2tbu': "P(Cy)$_2$(t-Bu)",
 '3e-pcy3': "P(Cy)$_3$",
 '3k-phjohnphos': "PhJohnPhos",
 '3l-phdavephos': "PhDavePhos",
 '3c-pipr3': "P(i-Pr)$_3$",
 '3b-potol3': "P(o-tol)$_3$",
 '3a-pph3': "P(Ph)$_3$",
 '3g-ptbu2cy': "P(t-Bu)$_2$(Cy)",
 '3h-ptbu3': "P(t-Bu)$_3$",
 '3q-ruphos': "RuPhos",
 '3p-sphos': "SPhos",
 '3t-tbujohnphos': "JohnPhos",
 '3x-tbubrettphos': "tBuBrettPhos",
 '3v-tbudavephos': "tBuDavePhos",
 '3u-tbumephos': "tBuMePhos",
 '3w-tbuxphos': "tBuXPhos",
 '3r-xphos': "XPhos",   
 '9c-etjohnphos': "EtJohnPhos",
 '9d-mejohnphos': "MeJohnPhos",
 '9a-cbujohnphos': 'CbuJohnPhos',
 '9b-cptjohnphos': 'CptJohnPhos',
 '9e-pcbu3': 'P(Cbu)$_3$',     
 '9f-pcpr3': 'P(Cpr)$_3$',
 '9h-pme3': 'P(Me)$_3$',
 '9g-pet3': 'P(Et)$_3$',
}

structure_dict = {'main': list(proper_names_dict.keys()),
                  'altpdb': ['3z-adjohnphos', '3aa-adbrettphos', '3s-brettphos', '3m-cyjohnphos', '3n-cymephos', '3o-davephos',
                             '3j-iprjohnphos', '3y-me4tbuxphos', '3i-pad3', '3d-pcpt3', '3f-pcy2tbu', '3e-pcy3', '3k-phjohnphos',
                             '3l-phdavephos', '3c-pipr3', '3b-potol3', '3a-pph3', '3g-ptbu2cy', '3h-ptbu3', '3q-ruphos',
                            '3p-sphos', '3t-tbujohnphos', '3x-tbubrettphos', '3v-tbudavephos', '3u-tbumephos', '3w-tbuxphos',
                            '3r-xphos',],
                  'xa': ['3w-tbuxphos', '3x-tbubrettphos', '3y-me4tbuxphos'],
                  'xb': ['3w-tbuxphos', '3x-tbubrettphos', '3y-me4tbuxphos'],          
                  'xaxb': ['3w-tbuxphos', '3x-tbubrettphos', '3y-me4tbuxphos'],
                  'ya': ['3s-brettphos', '3m-cyjohnphos', '3r-xphos', '3j-iprjohnphos', '3aa-adbrettphos', '3w-tbuxphos'],
                  'yb': ['3s-brettphos', '3m-cyjohnphos', '3r-xphos', '3j-iprjohnphos', '3aa-adbrettphos', '3w-tbuxphos'],
                  'yayb': ['3s-brettphos', '3m-cyjohnphos', '3r-xphos', '3j-iprjohnphos', '3aa-adbrettphos', '3w-tbuxphos'],
                  'p1': ['9h-pme3', '9g-pet3', '3c-pipr3', '9f-pcpr3', '9e-pcbu3', '3d-pcpt3', '3e-pcy3', '3a-pph3', '3m-cyjohnphos',
                          '3j-iprjohnphos', '9a-cbujohnphos', '9b-cptjohnphos', '9c-etjohnphos', '9d-mejohnphos', '3t-tbujohnphos'],
                  'alternative': ['3e-pcy3', '3h-ptbu3', '3m-cyjohnphos', '3t-tbujohnphos'],
                  "ob": ['3w-tbuxphos', '3x-tbubrettphos', '3y-me4tbuxphos'],
                  "hc": ['3e-pcy3', '3h-ptbu3'],
                  'pd3oac6': ['3e-pcy3', '3h-ptbu3', '3m-cyjohnphos', '3t-tbujohnphos'],
                  "ester": ['3e-pcy3', '3h-ptbu3', '3j-iprjohnphos', '3m-cyjohnphos', '3t-tbujohnphos'],
                  "functionalized": ['3e-pcy3', '3h-ptbu3', '3m-cyjohnphos', '3t-tbujohnphos']}

## Processing calculation results

In [3]:
df = pd.read_csv("Goodvibes_output.csv", skiprows=23)
df = df.tail(-1)
df = df.head(-1)
df = df[['   Structure', 'qh-G(T)_SPC', 'im']]
df.rename(columns={'   Structure': 'filename'}, inplace=True)
df = pd.concat([df, pd.DataFrame([['o  3aa-adbrettphos-35-ts-ya-c1',-3004.289383,-1000,]], columns=df.columns)]) # spoof missing data
df = df.reset_index(drop=True)

df['filename'] = df.apply(lambda row: row['filename'].split()[1], axis=1)
df[['group', 'structure']] = df.apply(lambda row: split_filename(row['filename']), axis=1, result_type='expand')

In [4]:
base_reactants_df = df[df['group'] == "base-reactants"].copy()
base_catalysis_df = df[df['group'] == "base-catalysis"].copy()
main_df = df[((~df['group'].isin(['base-catalysis', 'base-reactants', "unlig-pdoh2"])) & (df['structure'] != '99-lig') & (~df['structure'].str.contains('ref')))].copy()
ligands_df = df[df['structure'] == '99-lig'].copy()
unlig_df =  df[df['group'] == "unlig-pdoh2"].copy()
# ester_df is if structure name starts with "e" 
ester_df = df[df['structure'].str.startswith('e')].copy()
# first letter 6, second letter is alphanumeric
functionalized_df = df[df['structure'].str.match(r'^6[a-zA-Z]')].copy()
# drop base reactants from functionalized_df
functionalized_df = functionalized_df[~functionalized_df['structure'].isin(base_reactants_df['structure'])].copy()
same_ester_structures = {'00-lpdoh2': "e00-lpdoh2", 
                         '09-c3': "e09-ec3",
                          '10-ts-c3-c4': "e10-ts-ec3-ec4", 
                          '11-c4': "e11-ec4", 
                          '19-t3': "e20-et3", 
                          '20-ts-t3-t4': "e21-ts-et3-et4", 
                          '21-t4': 'e22-et4',}
#  for unique elements in ester_df['group'], add df rows into ester_df if the structure exists in same_ester_structures.keys()
for group in ester_df['group'].unique():
    slice_df = df[df['group'] == group]
    for structure in slice_df['structure']:
        if structure in same_ester_structures.keys():
            structure_df = slice_df[slice_df['structure'] == structure].copy()
            # change the structure name to the corresponding ester structure name
            structure_df['structure'] = same_ester_structures[structure]
            ester_df = pd.concat([ester_df, structure_df], ignore_index=True)
    
# sort ester_df by group then structure
ester_df = ester_df.sort_values(by=['group', 'structure'])

# drop structures in ester_df from main_df
main_df = main_df[~main_df['structure'].isin(ester_df['structure'])].copy()
main_df = main_df[~main_df['structure'].isin(functionalized_df['structure'])].copy()

In [5]:
main_df['equalized_gibbs'] = main_df.apply(lambda row: equalize_reference(row, row['group']), axis=1)
main_df['reference_gibbs'] = main_df.apply(lambda row: (row['equalized_gibbs'] - get_reference_energy(main_df, row['group']))*2625.5/4.184, axis=1)

In [6]:
ester_df['equalized_gibbs'] = ester_df.apply(lambda row: equalize_reference(row, row['group']), axis=1)
ester_df['reference_gibbs'] = ester_df.apply(lambda row: (row['equalized_gibbs'] - get_ester_reference_energy(ester_df, row['group']))*2625.5/4.184, axis=1)

In [7]:
def pull_functionalization(row):
    if row['structure'].startswith('6cn'):
        return row['structure'][:3]
    elif row['structure'].startswith('6ome'):
        return row['structure'][:4]
    else:
        print("Functionalization does not exist")
        
functionalized_df['functionalization'] = functionalized_df.apply(lambda row: pull_functionalization(row), axis=1)
functionalized_df['structure'] = functionalized_df.apply(lambda row: row['structure'].replace(row['functionalization'], ''), axis=1)
# add structure == 00-lpdoh2 to functionalized_df for the unique groups in functionalized_df
for group in functionalized_df['group'].unique():
    if group not in functionalized_df['group'].unique():
        functionalized_df = pd.concat([functionalized_df, df[df['group'] == group][df['structure'] == '00-lpdoh2']], ignore_index=True)
        
for group in functionalized_df['group'].unique():
    tmp_df = df[df['group'] == group][df['structure'] == '00-lpdoh2']
    for funct in functionalized_df['functionalization'].unique():
        tmp_df['functionalization'] = funct
        functionalized_df = pd.concat([functionalized_df, tmp_df], ignore_index=True)
        
for group in functionalized_df['group'].unique():
    for funct in functionalized_df['functionalization'].unique():
        slice_df = functionalized_df[(functionalized_df['group'] == group) & (functionalized_df['functionalization'] == funct)]
        for structure in slice_df['structure']:
            if structure in same_ester_structures.keys():
                structure_df = slice_df[(slice_df['structure'] == structure)].copy()
                # change the structure name to the corresponding ester structure name
                structure_df['structure'] = same_ester_structures[structure]
                functionalized_df = pd.concat([functionalized_df, structure_df], ignore_index=True)
            
functionalized_df = functionalized_df.sort_values(by=['group', 'structure']).reset_index(drop=True)

  tmp_df = df[df['group'] == group][df['structure'] == '00-lpdoh2']
  tmp_df = df[df['group'] == group][df['structure'] == '00-lpdoh2']
  tmp_df = df[df['group'] == group][df['structure'] == '00-lpdoh2']
  tmp_df = df[df['group'] == group][df['structure'] == '00-lpdoh2']


In [8]:
functionalized_ester_df = functionalized_df[functionalized_df['structure'].str.startswith('e')].copy()
functionalized_ester_df['equalized_gibbs'] = functionalized_ester_df.apply(lambda row: equalize_reference(row, row['group'], row['functionalization']), axis=1)
functionalized_ester_df['reference_gibbs'] = functionalized_ester_df.apply(lambda row: (row['equalized_gibbs'] - get_ester_reference_energy(functionalized_ester_df, row['group'], row['functionalization']))*2625.5/4.184, axis=1)
functionalized_acid_df = functionalized_df[~functionalized_df['structure'].str.startswith('e')].copy()
functionalized_acid_df['equalized_gibbs'] = functionalized_acid_df.apply(lambda row: equalize_reference(row, row['group'], row['functionalization']), axis=1)
functionalized_acid_df['reference_gibbs'] = functionalized_acid_df.apply(lambda row: (row['equalized_gibbs'] - get_reference_energy(functionalized_acid_df, row['group'], row['functionalization']))*2625.5/4.184, axis=1)


## Create microkinetic pathways

In [9]:
diff_barrier = 3.4663630747609946
type_list = ['main', 'xa', 'xb', 'xaxb', 'ya', 'yb', 'yayb', 'p1', 'ob', 'hc']

for type_name in type_list:
    int_list, ts_list, rxt_pdt_list, diffusion_ts_list, xform_dict = get_main_microkinetics_transformations(type_name=type_name)

    for ligand in structure_dict[type_name]:
        os.makedirs(f"microkinetics/{type_name}-{ligand}", exist_ok=True)
        print(f"{type_name}-{ligand}")
        current_df = main_df[main_df['group'] == ligand][['structure', 'qh-G(T)_SPC']].copy()
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': '100-l2pd2oh4', 'qh-G(T)_SPC': current_df[current_df['structure'] == '00-lpdoh2']['qh-G(T)_SPC'].values[0]}])], ignore_index=True)
        current_df.loc[current_df['structure'] == '00-lpdoh2', 'qh-G(T)_SPC'] /= 2

        if type_name == 'hc':
            current_df.loc[current_df['structure'] == '85-c2dim', 'qh-G(T)_SPC'] /= 2
        int_df = current_df[(~current_df['structure'].str.contains('ts')) & (current_df['structure'].isin(int_list))].copy()
        
        for rxt in rxt_pdt_list:
            if rxt == "ligand":
                int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_ligand_energy(ligands_df, ligand)}])], ignore_index=True)
            elif rxt == "pre-catalyst-dimer":
                int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_precatalyst_energy(current_df, ligand)}])], ignore_index=True)
            elif rxt == "pre-catalyst-monomer":
                int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_precatalyst_energy(current_df, ligand)/2}])], ignore_index=True)
            else:
                int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_reactant_energy(rxt)}])], ignore_index=True)
        

        int_df.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-intermediates.csv", index=False)

        xforms = []
        for ts, xform in xform_dict.items():
            energy_l, energy_r = get_left_right_barrier_reference_energies(int_df, xform)
            if ts in diffusion_ts_list:
                ts_energy = max(energy_l, energy_r)
            else:
                ts_energy = get_energy(current_df, ts)

            xforms.append({'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3], 
                            'Energy': ts_energy, 'Backwards': xform[4], 'Flag': ts, 
                            'Ref_L': (ts_energy-energy_l)*2625.5/4.184, 
                            'Ref_R': (ts_energy-energy_r)*2625.5/4.184,}
                            )
        
        xform_df = pd.DataFrame(xforms, columns=['I1', 'I2', 'P1', 'P2', 'Energy', 'Backwards', 'Flag', 'Ref_L', 'Ref_R'])
        xform_df["Backwards"] = xform_df["Backwards"].astype(bool)    

        xform_df.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-transformations.csv", index=False)    


main-3z-adjohnphos
main-3aa-adbrettphos
main-3s-brettphos
main-3m-cyjohnphos
main-3n-cymephos
main-3o-davephos
main-3j-iprjohnphos
main-3y-me4tbuxphos
main-3i-pad3
main-3d-pcpt3
main-3f-pcy2tbu
main-3e-pcy3
main-3k-phjohnphos
main-3l-phdavephos
main-3c-pipr3
main-3b-potol3
main-3a-pph3
main-3g-ptbu2cy
main-3h-ptbu3
main-3q-ruphos
main-3p-sphos
main-3t-tbujohnphos
main-3x-tbubrettphos
main-3v-tbudavephos
main-3u-tbumephos
main-3w-tbuxphos
main-3r-xphos
main-9c-etjohnphos
main-9d-mejohnphos
main-9a-cbujohnphos
main-9b-cptjohnphos
main-9e-pcbu3
main-9f-pcpr3
main-9h-pme3
main-9g-pet3
xa-3w-tbuxphos
xa-3x-tbubrettphos
xa-3y-me4tbuxphos
xb-3w-tbuxphos
xb-3x-tbubrettphos
xb-3y-me4tbuxphos
xaxb-3w-tbuxphos
xaxb-3x-tbubrettphos
xaxb-3y-me4tbuxphos
ya-3s-brettphos
ya-3m-cyjohnphos
ya-3r-xphos
ya-3j-iprjohnphos
ya-3aa-adbrettphos
ya-3w-tbuxphos
yb-3s-brettphos
yb-3m-cyjohnphos
yb-3r-xphos
yb-3j-iprjohnphos
yb-3aa-adbrettphos
yb-3w-tbuxphos
yayb-3s-brettphos
yayb-3m-cyjohnphos
yayb-3r-xphos
yayb-

In [10]:
diff_barrier = 3.4663630747609946

for type_name in ['ester']:
    int_list, ts_list, rxt_pdt_list, diffusion_ts_list, xform_dict = get_main_microkinetics_transformations(type_name=type_name)

    for ligand in structure_dict[type_name]:
        os.makedirs(f"microkinetics/{type_name}-{ligand}", exist_ok=True)
        print(f"{type_name}-{ligand}")
        current_df = ester_df[ester_df['group'] == ligand][['structure', 'qh-G(T)_SPC']].copy()
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'e100-l2pd2oh4', 'qh-G(T)_SPC': current_df[current_df['structure'] == 'e00-lpdoh2']['qh-G(T)_SPC'].values[0]}])], 
                               ignore_index=True)
        current_df.loc[current_df['structure'] == 'e00-lpdoh2', 'qh-G(T)_SPC'] /= 2

        int_df = current_df[(~current_df['structure'].str.contains('ts')) & (current_df['structure'].isin(int_list))].copy()
        
        for rxt in rxt_pdt_list:
            if rxt == "ligand":
                int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_ligand_energy(ligands_df, ligand)}])], ignore_index=True)
            elif rxt == "pre-catalyst-dimer":
                int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_precatalyst_energy(current_df, ligand)}])], ignore_index=True)
            elif rxt == "pre-catalyst-monomer":
                int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_precatalyst_energy(current_df, ligand)/2}])], ignore_index=True)
            else:
                int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_reactant_energy(rxt)}])], ignore_index=True)
        

        int_df.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-intermediates.csv", index=False)

        xforms = []
        for ts, xform in xform_dict.items():
            energy_l, energy_r = get_left_right_barrier_reference_energies(int_df, xform)
            if ts in diffusion_ts_list:
                ts_energy = max(energy_l, energy_r)
            else:
                ts_energy = get_energy(current_df, ts)

            xforms.append({'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3], 
                            'Energy': ts_energy, 'Backwards': xform[4], 'Flag': ts, 
                            'Ref_L': (ts_energy-energy_l)*2625.5/4.184, 
                            'Ref_R': (ts_energy-energy_r)*2625.5/4.184,}
                            )
        
        xform_df = pd.DataFrame(xforms, columns=['I1', 'I2', 'P1', 'P2', 'Energy', 'Backwards', 'Flag', 'Ref_L', 'Ref_R'])
        xform_df["Backwards"] = xform_df["Backwards"].astype(bool)    

        xform_df.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-transformations.csv", index=False)    


ester-3e-pcy3
ester-3h-ptbu3
ester-3j-iprjohnphos
ester-3m-cyjohnphos
ester-3t-tbujohnphos


In [11]:
diff_barrier = 3.4663630747609946

for type_name in ['main']:
    for functionalization in functionalizations:
        int_list, ts_list, rxt_pdt_list, diffusion_ts_list, xform_dict = get_main_microkinetics_transformations(type_name=type_name, functionalization=functionalization)

        for ligand in structure_dict['functionalized']:
            folder_name = f"{type_name}-{functionalization}{ligand}"
            os.makedirs(f"microkinetics/{folder_name}", exist_ok=True)
            print(f"{folder_name}")
            current_df = functionalized_acid_df[(functionalized_acid_df['group'] == ligand) & (functionalized_acid_df['functionalization'] == functionalization)][['structure', 'qh-G(T)_SPC']].copy()
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': '100-l2pd2oh4', 'qh-G(T)_SPC': current_df[current_df['structure'] == '00-lpdoh2']['qh-G(T)_SPC'].values[0]}])], 
                                ignore_index=True)
            current_df.loc[current_df['structure'] == '00-lpdoh2', 'qh-G(T)_SPC'] /= 2

            int_df = current_df[(~current_df['structure'].str.contains('ts')) & (current_df['structure'].isin(int_list))].copy()
            
            for rxt in rxt_pdt_list:
                if rxt == "ligand":
                    int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_ligand_energy(ligands_df, ligand)}])], ignore_index=True)
                elif rxt == "pre-catalyst-dimer":
                    int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_precatalyst_energy(current_df, ligand)}])], ignore_index=True)
                elif rxt == "pre-catalyst-monomer":
                    int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_precatalyst_energy(current_df, ligand)/2}])], ignore_index=True)
                else:
                    int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_reactant_energy(rxt, functionalization=functionalization)}])], ignore_index=True)
            

            int_df.to_csv(f"microkinetics/{folder_name}/{folder_name}-intermediates.csv", index=False)

            xforms = []
            for ts, xform in xform_dict.items():
                energy_l, energy_r = get_left_right_barrier_reference_energies(int_df, xform)
                if ts in diffusion_ts_list:
                    ts_energy = max(energy_l, energy_r)
                else:
                    ts_energy = get_energy(current_df, ts)

                xforms.append({'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3], 
                                'Energy': ts_energy, 'Backwards': xform[4], 'Flag': ts, 
                                'Ref_L': (ts_energy-energy_l)*2625.5/4.184, 
                                'Ref_R': (ts_energy-energy_r)*2625.5/4.184,}
                                )
            
            xform_df = pd.DataFrame(xforms, columns=['I1', 'I2', 'P1', 'P2', 'Energy', 'Backwards', 'Flag', 'Ref_L', 'Ref_R'])
            xform_df["Backwards"] = xform_df["Backwards"].astype(bool)    

            xform_df.to_csv(f"microkinetics/{folder_name}/{folder_name}-transformations.csv", index=False)    
            
for type_name in ['ester']:
    for functionalization in functionalizations:
        int_list, ts_list, rxt_pdt_list, diffusion_ts_list, xform_dict = get_main_microkinetics_transformations(type_name=type_name, functionalization=functionalization)

        for ligand in structure_dict['functionalized']:
            folder_name = f"{type_name}-{functionalization}{ligand}"
            os.makedirs(f"microkinetics/{folder_name}", exist_ok=True)
            print(f"{folder_name}")
            current_df = functionalized_ester_df[(functionalized_ester_df['group'] == ligand) & (functionalized_ester_df['functionalization'] == functionalization)][['structure', 'qh-G(T)_SPC']].copy()
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'e100-l2pd2oh4', 'qh-G(T)_SPC': current_df[current_df['structure'] == 'e00-lpdoh2']['qh-G(T)_SPC'].values[0]}])], 
                                ignore_index=True)
            current_df.loc[current_df['structure'] == 'e00-lpdoh2', 'qh-G(T)_SPC'] /= 2

            int_df = current_df[(~current_df['structure'].str.contains('ts')) & (current_df['structure'].isin(int_list))].copy()
            
            for rxt in rxt_pdt_list:
                if rxt == "ligand":
                    int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_ligand_energy(ligands_df, ligand)}])], ignore_index=True)
                elif rxt == "pre-catalyst-dimer":
                    int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_precatalyst_energy(current_df, ligand)}])], ignore_index=True)
                elif rxt == "pre-catalyst-monomer":
                    int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_precatalyst_energy(current_df, ligand)/2}])], ignore_index=True)
                else:
                    int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_reactant_energy(rxt, functionalization=functionalization)}])], ignore_index=True)
            

            int_df.to_csv(f"microkinetics/{folder_name}/{folder_name}-intermediates.csv", index=False)

            xforms = []
            for ts, xform in xform_dict.items():
                energy_l, energy_r = get_left_right_barrier_reference_energies(int_df, xform)
                if ts in diffusion_ts_list:
                    ts_energy = max(energy_l, energy_r)
                else:
                    ts_energy = get_energy(current_df, ts)

                xforms.append({'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3], 
                                'Energy': ts_energy, 'Backwards': xform[4], 'Flag': ts, 
                                'Ref_L': (ts_energy-energy_l)*2625.5/4.184, 
                                'Ref_R': (ts_energy-energy_r)*2625.5/4.184,}
                                )
            
            xform_df = pd.DataFrame(xforms, columns=['I1', 'I2', 'P1', 'P2', 'Energy', 'Backwards', 'Flag', 'Ref_L', 'Ref_R'])
            xform_df["Backwards"] = xform_df["Backwards"].astype(bool)    

            xform_df.to_csv(f"microkinetics/{folder_name}/{folder_name}-transformations.csv", index=False)    
            


main-6ome3e-pcy3
main-6ome3h-ptbu3
main-6ome3m-cyjohnphos
main-6ome3t-tbujohnphos
main-6cn3e-pcy3
main-6cn3h-ptbu3
main-6cn3m-cyjohnphos
main-6cn3t-tbujohnphos
ester-6ome3e-pcy3
ester-6ome3h-ptbu3
ester-6ome3m-cyjohnphos
ester-6ome3t-tbujohnphos
ester-6cn3e-pcy3
ester-6cn3h-ptbu3
ester-6cn3m-cyjohnphos
ester-6cn3t-tbujohnphos


## Plotting reaction profile diagrams

In [12]:
def interpolate(df, line, num=1000):
    sub_df = df[df['line'] == line]
    x_min = min(sub_df['point'])
    x_max = max(sub_df['point'])
    x_interp = np.linspace(x_min, x_max, num)
    # Turning point at energy minima/maxima
    spline_anchors = [(row['point'], [row['reference_gibbs'], 0]) for _ , row in sub_df.iterrows()]
    #print(spline_anchors)
    spline = BPoly.from_derivatives(xi=sub_df['point'], yi=[[row['reference_gibbs'], 0] for _ , row in sub_df.iterrows()])
    return x_interp, spline(x_interp)

In [13]:
type_list

['main', 'xa', 'xb', 'xaxb', 'ya', 'yb', 'yayb', 'p1', 'ob', 'hc']

In [14]:
base_diffusion_consideration_dict = {"C2 // H$_2$O": ["C2", "C2 + H$_2$O"],
                       "C3 // B(OH)$_3$": ["C3 + B(OH)$_3$", "C3"],
                       "C4-PDT": ['C4', 'PDT'],
                       "T2 // H$_2$O": ["T2", "T2 + H$_2$O"],
                       "T3 // B(OH)$_3$": ["T3 + B(OH)$_3$", "T3"],
                       'T4-PDT': ["T4", "PDT"],
                       "Pd-RXT": ["[LPd(OH)$_2$]$_2$", "RXT",],
                        }

pdt_energy = (get_reactant_energy('naph') + get_reactant_energy('boh3') - get_reactant_energy('naphboh2') - get_reactant_energy('h2o'))*2625.5/4.184

for type_name in type_list:

    diffusion_consideration_dict = dict(base_diffusion_consideration_dict)

    if type_name == 'p1':
        diffusion_consideration_dict.update({"T2 // L": ["T2", "T2 + L"],
                                                "T3 // L": ["T3", "T3 + L"],
                                                "P1 // B(OH)$_3$": ["P1 + B(OH)$_3$", "P1"],
                                                "P1 // H$_2$O": ["P1 + H$_2$O", "P1"],
                                                })
        
    if type_name == 'hc':
        diffusion_consideration_dict.update({"C2 // C2$_{dimer}$": ["C2", "C2$_{dimer}$"],
                                            "C2$_{dimer}$ // C2": ["C2$_{dimer}$", "C2"],
                                            "C2 // C2 + NaphB(OH)$_2$": ["C2", "C2 + NaphB(OH)$_2$"],
                                            "HC1 + B(OH)$_3$ // HC1": ["HC1 + B(OH)$_3$", "HC1"],
                                            "HC3 // HC4 + B(OH)$_3$": ["HC3", "HC4 + B(OH)$_3$"],
                                            "LPd(0)B(OH)$_3$ // LPd(0) + B(OH)$_3$": ["LPd(0)B(OH)$_3$", "LPd(0) + B(OH)$_3$"],
                                        })      

    if type_name == 'ob':
        diffusion_consideration_dict.update({"T2 + H$_2$O // T2 + 2H$_2$O": ["T2 + H$_2$O", "T2 + 2H$_2$O"],
                                             "T3OB + 2H$_2$O // T3OB + H$_2$O": ["T3OB + 2H$_2$O", "T3OB + H$_2$O"],
                                             "T3OB + H$_2$O // T3OB": ["T3OB + H$_2$O", "T3OB"],
                                             "T5OB + R-H // T5OB": ["T5OB + R-H", "T5OB"],
                                             "T5OB // T5OB + H$_2$O": ["T5OB", "T5OB + H$_2$O"],
                                             "T5OB + H$_2$O // T5OB + 2H$_2$O": ["T5OB + H$_2$O", "T5OB + 2H$_2$O"],
                                             "T6OB + H$_2$O // T6OB": ["T6OB + H$_2$O", "T6OB"],
                                             "T6OB // PDT": ["T6OB", "PDT"],
                                             "T4OB // T4OB + H$_2$O": ["T4OB", "T4OB + H$_2$O"],
                                             "T4OB + H$_2$O // T4OB + 2H$_2$O": ["T4OB + H$_2$O", "T4OB + 2H$_2$O"],
                                             "T4 + B(OH)$_3$ + H$_2$O // T4 + B(OH)$_3$": ["T4 + B(OH)$_3$ + H$_2$O", "T4 + B(OH)$_3$"],
                                             "T4 + B(OH)$_3$ // T4": ["T4 + B(OH)$_3$", "T4"],
                                             "T3 + B(OH)$_3$ + H$_2$O // T3 + B(OH)$_3$": ["T3 + B(OH)$_3$ + H$_2$O", "T3 + B(OH)$_3$"],
                                             })
    for ligand in structure_dict[type_name]:
        print(f"{type_name}-{ligand}")
        os.makedirs(f"microkinetics/{type_name}-{ligand}", exist_ok=True)
        current_df = main_df[main_df['group'] == ligand][['structure', 'reference_gibbs']].copy()
        current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
        for diffusion, consideration in diffusion_consideration_dict.items():
            energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)  
        try:
            template = pd.read_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv")
            template = pd.merge(template, current_df[['label_name', 'reference_gibbs']], on='label_name', how='left', suffixes=('_old', ""))
            template = template.drop('reference_gibbs_old', axis=1)
            template.sort_values(by=['line', 'point'], ascending=[True, True], inplace=True)
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv", index=False)
        except FileNotFoundError:
            template = pd.read_csv(f"templates/rpd_template_{type_name}.csv")
            template = pd.merge(template, current_df, on='label_name')
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv", index=False)

        fig, ax = plt.subplots(figsize=(6,3), dpi=300)
        for line in set(template['line'].values):
            if line > 20:
                continue
            else:
                curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
                x_solid, y_solid = interpolate(curr_line_df, line=line)

                for idx, point in curr_line_df.iterrows():
                    if point['is_point'] == True:
                        ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                    if point['is_TS'] == True:
                        # Locate triangle marker index
                        idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                        idx_1 = np.where(x_solid >= point['point'])[0][0]
                        idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                        ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                                zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                    
                    if point['is_label'] == True:
                        label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                        ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                    ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                    fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                    
                
                        
        ax.spines[['right', 'top', 'bottom']].set_visible(False)           
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        plt.tick_params(left = False,bottom=False) 
        ax.set_xlim(left=-1.5)    
        ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

        ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
        if f"{type_name}-{ligand}" == "p1-3t-tbujohnphos":
            ax.set_ylim(-30, 45)
        elif f"{type_name}-{ligand}" == "yayb-3w-tbuxphos" or f"{type_name}-{ligand}" == "yayb-3aa-adbrettphos":
            ax.set_ylim(-30, 40)
        elif type_name == 'ob' or type_name == 'xaxb':
            ax.set_ylim(-30, 30)
        elif type_name == 'hc':
            ax.set_ylim(-65, 30)
        else:
            ax.set_ylim(-45, 30)
        plt.title(f"L = {proper_names_dict[ligand]}", size=8)
        plt.savefig(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.png", facecolor='white', transparent=False, bbox_inches='tight')
        plt.close()

main-3z-adjohnphos
main-3aa-adbrettphos
main-3s-brettphos
main-3m-cyjohnphos
main-3n-cymephos
main-3o-davephos
main-3j-iprjohnphos
main-3y-me4tbuxphos
main-3i-pad3
main-3d-pcpt3
main-3f-pcy2tbu
main-3e-pcy3
main-3k-phjohnphos
main-3l-phdavephos
main-3c-pipr3
main-3b-potol3
main-3a-pph3
main-3g-ptbu2cy
main-3h-ptbu3
main-3q-ruphos
main-3p-sphos
main-3t-tbujohnphos
main-3x-tbubrettphos
main-3v-tbudavephos
main-3u-tbumephos
main-3w-tbuxphos
main-3r-xphos
main-9c-etjohnphos
main-9d-mejohnphos
main-9a-cbujohnphos
main-9b-cptjohnphos
main-9e-pcbu3
main-9f-pcpr3
main-9h-pme3
main-9g-pet3
xa-3w-tbuxphos
xa-3x-tbubrettphos
xa-3y-me4tbuxphos
xb-3w-tbuxphos
xb-3x-tbubrettphos
xb-3y-me4tbuxphos
xaxb-3w-tbuxphos
xaxb-3x-tbubrettphos
xaxb-3y-me4tbuxphos
ya-3s-brettphos
ya-3m-cyjohnphos
ya-3r-xphos
ya-3j-iprjohnphos
ya-3aa-adbrettphos
ya-3w-tbuxphos
yb-3s-brettphos
yb-3m-cyjohnphos
yb-3r-xphos
yb-3j-iprjohnphos
yb-3aa-adbrettphos
yb-3w-tbuxphos
yayb-3s-brettphos
yayb-3m-cyjohnphos
yayb-3r-xphos
yayb-

In [15]:
base_diffusion_consideration_dict = {"C2 // H$_2$O": ["C2", "C2 + H$_2$O"],
                       "C3 // B(OH)$_3$": ["C3 + B(OH)$_3$", "C3"],
                       "C4-PDT": ['C4', 'PDT'],
                       "T2 // H$_2$O": ["T2", "T2 + H$_2$O"],
                       "T3 // B(OH)$_3$": ["T3 + B(OH)$_3$", "T3"],
                       'T4-PDT': ["T4", "PDT"],
                       "Pd-RXT": ["[LPd(OH)$_2$]$_2$", "RXT",],
                        }

pdt_energy = (get_reactant_energy('naph') + get_reactant_energy('boh3') - get_reactant_energy('naphboh2') - get_reactant_energy('h2o'))*2625.5/4.184

for type_name in ["ob"]:

    diffusion_consideration_dict = dict(base_diffusion_consideration_dict)
    diffusion_consideration_dict.update({"T2 + H$_2$O // T2 + 2H$_2$O": ["T2 + H$_2$O", "T2 + 2H$_2$O"],
                                            "T3OB + 2H$_2$O // T3OB + H$_2$O": ["T3OB + 2H$_2$O", "T3OB + H$_2$O"],
                                            "T3OB + H$_2$O // T3OB": ["T3OB + H$_2$O", "T3OB"],
                                            "T5OB + R-H // T5OB": ["T5OB + R-H", "T5OB"],
                                            "T5OB // T5OB + H$_2$O": ["T5OB", "T5OB + H$_2$O"],
                                            "T5OB + H$_2$O // T5OB + 2H$_2$O": ["T5OB + H$_2$O", "T5OB + 2H$_2$O"],
                                            "T6OB + H$_2$O // T6OB": ["T6OB + H$_2$O", "T6OB"],
                                            "T6OB // PDT": ["T6OB", "PDT"],
                                            "T4OB // T4OB + H$_2$O": ["T4OB", "T4OB + H$_2$O"],
                                            "T4OB + H$_2$O // T4OB + 2H$_2$O": ["T4OB + H$_2$O", "T4OB + 2H$_2$O"],
                                            "T4 + B(OH)$_3$ + H$_2$O // T4 + B(OH)$_3$": ["T4 + B(OH)$_3$ + H$_2$O", "T4 + B(OH)$_3$"],
                                            "T4 + B(OH)$_3$ // T4": ["T4 + B(OH)$_3$", "T4"],
                                            })

    for ligand in structure_dict[type_name]:
        print(f"{type_name}-{ligand}")
        os.makedirs(f"microkinetics/{type_name}-{ligand}", exist_ok=True)
        current_df = main_df[main_df['group'] == ligand][['structure', 'reference_gibbs']].copy()
        current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
        for diffusion, consideration in diffusion_consideration_dict.items():
            energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)  
        try:
            template = pd.read_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-zoom-plot.csv")
            template = pd.merge(template, current_df[['label_name', 'reference_gibbs']], on='label_name', how='left', suffixes=('_old', ""))
            template = template.drop('reference_gibbs_old', axis=1)
            template.sort_values(by=['line', 'point'], ascending=[True, True], inplace=True)
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-zoom-plot.csv", index=False)
        except FileNotFoundError:
            template = pd.read_csv(f"templates/rpd_template_{type_name}-zoom.csv")
            template = pd.merge(template, current_df, on='label_name')
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-zoom-plot.csv", index=False)

        fig, ax = plt.subplots(figsize=(6,3), dpi=300)
        for line in set(template['line'].values):
            curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
            x_solid, y_solid = interpolate(curr_line_df, line=line)

            for idx, point in curr_line_df.iterrows():
                if point['is_point'] == True:
                    ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                if point['is_TS'] == True:
                    # Locate triangle marker index
                    idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                    idx_1 = np.where(x_solid >= point['point'])[0][0]
                    idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                    ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                            zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                
                if point['is_label'] == True:
                    label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                    ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                    
                
                        
        ax.spines[['right', 'top', 'bottom']].set_visible(False)           
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        plt.tick_params(left = False,bottom=False) 
        ax.set_xlim(left=17.5)    
        ax.plot(17.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

        ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
        ax.set_ylim(-30, 30)

        plt.title(f"L = {proper_names_dict[ligand]}", size=8)
        plt.savefig(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-zoom-plot.png", facecolor='white', transparent=False, bbox_inches='tight')
        plt.close()

ob-3w-tbuxphos
ob-3x-tbubrettphos
ob-3y-me4tbuxphos


In [16]:
ester_base_diffusion_consideration_dict = {"EC2 // H$_2$O": ["EC2", "EC2 + H$_2$O"],
                       "EC3 // BpinOH": ["EC3 + BpinOH", "EC3"],
                       "EC4-PDT": ['EC4', 'PDT'],
                       "ET2 // H$_2$O": ["ET2", "ET2 + H$_2$O"],
                       "ET3 // BpinOH": ["ET3 + BpinOH", "ET3"],
                       'ET4-PDT': ["ET4", "PDT"],
                       "Pd-ECRXT": ["[LPd(OH)$_2$]$_2$", "ECRXT",],
                       "Pd-ETRXT": ["[LPd(OH)$_2$]$_2$", "ETRXT",],
                       "EC2B // H$_2$O": ["EC2B", "EC2B + H$_2$O"],
                       "EC3B // BpinOH": ["EC3B + BpinOH", "EC3"],
                       "ET2B // H$_2$O": ["ET2B", "ET2B + H$_2$O"],
                       "ET3B // BpinOH": ["ET3B + BpinOH", "ET3"],
                        }

pdt_energy = (get_reactant_energy('naph') + get_reactant_energy('bpinoh') - get_reactant_energy('naphbpin') - get_reactant_energy('h2o'))*2625.5/4.184

for type_name in ['ester']:
    diffusion_consideration_dict = dict(ester_base_diffusion_consideration_dict)
    for ligand in ester_df['group'].unique():
        print(f"{type_name}-{ligand}")
        os.makedirs(f"microkinetics/{type_name}-{ligand}", exist_ok=True)
        current_df = ester_df[ester_df['group'] == ligand][['structure', 'reference_gibbs']].copy()
        current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
        for diffusion, consideration in diffusion_consideration_dict.items():
            try:
                energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
                current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)  
            except IndexError:
                pass
        try:
            template = pd.read_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv")
            template = pd.merge(template, current_df[['label_name', 'reference_gibbs']], on='label_name', how='left', suffixes=('_old', ""))
            template = template.drop('reference_gibbs_old', axis=1)
            template.sort_values(by=['line', 'point'], ascending=[True, True], inplace=True)
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv", index=False)
        except FileNotFoundError:
            template = pd.read_csv(f"templates/rpd_template_{type_name}.csv")
            template = pd.merge(template, current_df, on='label_name')
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv", index=False)

        fig, ax = plt.subplots(figsize=(6,3), dpi=300)
        for line in set(template['line'].values):
            if line > 20:
                continue
            else:
                curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
                x_solid, y_solid = interpolate(curr_line_df, line=line)

                for idx, point in curr_line_df.iterrows():
                    if point['is_point'] == True:
                        ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                    if point['is_TS'] == True:
                        # Locate triangle marker index
                        idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                        idx_1 = np.where(x_solid >= point['point'])[0][0]
                        idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                        ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                                zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                    
                    if point['is_label'] == True:
                        label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                        ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                    ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                    fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                    
                
                        
        ax.spines[['right', 'top', 'bottom']].set_visible(False)           
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        plt.tick_params(left = False,bottom=False) 
        ax.set_xlim(left=-1.5)    
        ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

        ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
        ax.set_ylim(-45, 30)
        plt.title(f"L = {proper_names_dict[ligand]}", size=8)
        plt.savefig(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.png", facecolor='white', transparent=False, bbox_inches='tight')
        plt.close()
        
        etrxt_ref = template[template['label_name'] == 'ETRXT']['reference_gibbs'].values[0]
        template['reference_gibbs'] -= etrxt_ref
        
        fig, ax = plt.subplots(figsize=(6,3), dpi=300)
        for line in set(template['line'].values):
            if line > 20:
                continue
            else:
                curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
                x_solid, y_solid = interpolate(curr_line_df, line=line)

                for idx, point in curr_line_df.iterrows():
                    if point['is_point'] == True:
                        ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                    if point['is_TS'] == True:
                        # Locate triangle marker index
                        idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                        idx_1 = np.where(x_solid >= point['point'])[0][0]
                        idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                        ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                                zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                    
                    if point['is_label'] == True:
                        label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                        ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                    ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                    fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                    
                
                        
        ax.spines[['right', 'top', 'bottom']].set_visible(False)           
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        plt.tick_params(left = False,bottom=False) 
        ax.set_xlim(left=-1.5)    
        ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

        ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
        ax.set_ylim(-45, 30)
        plt.title(f"L = {proper_names_dict[ligand]}", size=8)
        plt.savefig(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot-rxtref.png", facecolor='white', transparent=False, bbox_inches='tight')
        plt.close()

ester-3e-pcy3
ester-3h-ptbu3
ester-3j-iprjohnphos
ester-3m-cyjohnphos
ester-3t-tbujohnphos


In [17]:
nice_functionalizations = {'6ome': '6-OMe', '6cn': '6-CN'}

base_diffusion_consideration_dict = {"C2 // H$_2$O": ["C2", "C2 + H$_2$O"],
                       "C3 // B(OH)$_3$": ["C3 + B(OH)$_3$", "C3"],
                       "C4-PDT": ['C4', 'PDT'],
                       "T2 // H$_2$O": ["T2", "T2 + H$_2$O"],
                       "T3 // B(OH)$_3$": ["T3 + B(OH)$_3$", "T3"],
                       'T4-PDT': ["T4", "PDT"],
                       "Pd-RXT": ["[LPd(OH)$_2$]$_2$", "RXT",],
                        }

for type_name in ['main']:
    diffusion_consideration_dict = dict(base_diffusion_consideration_dict)
    for functionalization in functionalizations:
        for ligand in structure_dict['functionalized']:
            folder_name = f"{type_name}-{functionalization}{ligand}"
            pdt_energy = (get_reactant_energy(f'{functionalization}naph') + get_reactant_energy(f'boh3') - get_reactant_energy(f'{functionalization}naphboh2') - get_reactant_energy('h2o'))*2625.5/4.184
            current_df = functionalized_acid_df[(functionalized_acid_df['group'] == ligand) & (functionalized_acid_df['functionalization'] == functionalization)][['structure', 'reference_gibbs']].copy()
            current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
            for diffusion, consideration in diffusion_consideration_dict.items():
                try:
                    energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
                    current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)  
                except IndexError:
                    pass
            try:
                template = pd.read_csv(f"microkinetics/{folder_name}/{folder_name}-plot.csv")
                template = pd.merge(template, current_df[['label_name', 'reference_gibbs']], on='label_name', how='left', suffixes=('_old', ""))
                template = template.drop('reference_gibbs_old', axis=1)
                template.sort_values(by=['line', 'point'], ascending=[True, True], inplace=True)
                template.to_csv(f"microkinetics/{folder_name}/{folder_name}-plot.csv", index=False)
            except FileNotFoundError:
                template = pd.read_csv(f"templates/rpd_template_{type_name}.csv")
                template = pd.merge(template, current_df, on='label_name')
                template.to_csv(f"microkinetics/{folder_name}/{folder_name}-plot.csv", index=False)

            fig, ax = plt.subplots(figsize=(6,3), dpi=300)
            for line in set(template['line'].values):
                if line > 20:
                    continue
                else:
                    curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
                    x_solid, y_solid = interpolate(curr_line_df, line=line)

                    for idx, point in curr_line_df.iterrows():
                        if point['is_point'] == True:
                            ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                        if point['is_TS'] == True:
                            # Locate triangle marker index
                            idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                            idx_1 = np.where(x_solid >= point['point'])[0][0]
                            idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                            ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                                    zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                        
                        if point['is_label'] == True:
                            label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                            ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                        ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                        fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                        
                    
                            
            ax.spines[['right', 'top', 'bottom']].set_visible(False)           
            ax.set_yticklabels([])
            ax.set_xticklabels([])
            plt.tick_params(left = False,bottom=False) 
            ax.set_xlim(left=-1.5)    
            ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

            ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
            ax.set_ylim(-45, 30)
            plt.title(f"L = {proper_names_dict[ligand]}, {nice_functionalizations[functionalization]}-NaphB(OH)$_2$", size=8)
            plt.savefig(f"microkinetics/{folder_name}/{folder_name}-plot.png", facecolor='white', transparent=False, bbox_inches='tight')
            plt.close()
            
            # find where label name is RXT, pull its reference_gibbs value
            rxt_reference = template[template['label_name'] == 'RXT']['reference_gibbs'].values[0]
            
            template['reference_gibbs'] = template['reference_gibbs'] - rxt_reference

            fig, ax = plt.subplots(figsize=(6,3), dpi=300)
            for line in set(template['line'].values):
                if line > 20:
                    continue
                else:
                    curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
                    x_solid, y_solid = interpolate(curr_line_df, line=line)

                    for idx, point in curr_line_df.iterrows():
                        if point['is_point'] == True:
                            ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                        if point['is_TS'] == True:
                            # Locate triangle marker index
                            idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                            idx_1 = np.where(x_solid >= point['point'])[0][0]
                            idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                            ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                                    zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                        
                        if point['is_label'] == True:
                            label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                            ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                        ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                        fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                        
                    
                            
            ax.spines[['right', 'top', 'bottom']].set_visible(False)           
            ax.set_yticklabels([])
            ax.set_xticklabels([])
            plt.tick_params(left = False,bottom=False) 
            ax.set_xlim(left=-1.5)    
            ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

            ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
            ax.set_ylim(-45, 30)
            plt.title(f"L = {proper_names_dict[ligand]}, {nice_functionalizations[functionalization]}-NaphB(OH)$_2$", size=8)
            plt.savefig(f"microkinetics/{folder_name}/{folder_name}-plot-rxtref.png", facecolor='white', transparent=False, bbox_inches='tight')
            plt.close()            

In [18]:
ester_base_diffusion_consideration_dict = {"EC2 // H$_2$O": ["EC2", "EC2 + H$_2$O"],
                       "EC3 // BpinOH": ["EC3 + BpinOH", "EC3"],
                       "EC4-PDT": ['EC4', 'PDT'],
                       "ET2 // H$_2$O": ["ET2", "ET2 + H$_2$O"],
                       "ET3 // BpinOH": ["ET3 + BpinOH", "ET3"],
                       'ET4-PDT': ["ET4", "PDT"],
                       "Pd-ECRXT": ["[LPd(OH)$_2$]$_2$", "ECRXT",],
                       "Pd-ETRXT": ["[LPd(OH)$_2$]$_2$", "ETRXT",],
                       "EC2B // H$_2$O": ["EC2B", "EC2B + H$_2$O"],
                       "EC3B // BpinOH": ["EC3B + BpinOH", "EC3"],
                       "ET2B // H$_2$O": ["ET2B", "ET2B + H$_2$O"],
                       "ET3B // BpinOH": ["ET3B + BpinOH", "ET3"],
                        }

for type_name in ['ester']:
    diffusion_consideration_dict = dict(ester_base_diffusion_consideration_dict)
    for functionalization in functionalizations:
        for ligand in structure_dict['functionalized']:
            folder_name = f"{type_name}-{functionalization}{ligand}"
            pdt_energy = (get_reactant_energy(f'{functionalization}naph') + get_reactant_energy(f'bpinoh') - get_reactant_energy(f'{functionalization}naphbpin') - get_reactant_energy('h2o'))*2625.5/4.184
            current_df = functionalized_ester_df[(functionalized_ester_df['group'] == ligand) & (functionalized_ester_df['functionalization'] == functionalization)][['structure', 'reference_gibbs']].copy()
            current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
            for diffusion, consideration in diffusion_consideration_dict.items():
                try:
                    energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
                    current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)  
                except IndexError:
                    pass
            try:
                template = pd.read_csv(f"microkinetics/{folder_name}/{folder_name}-plot.csv")
                template = pd.merge(template, current_df[['label_name', 'reference_gibbs']], on='label_name', how='left', suffixes=('_old', ""))
                template = template.drop('reference_gibbs_old', axis=1)
                template.sort_values(by=['line', 'point'], ascending=[True, True], inplace=True)
                template.to_csv(f"microkinetics/{folder_name}/{folder_name}-plot.csv", index=False)
            except FileNotFoundError:
                template = pd.read_csv(f"templates/rpd_template_{type_name}.csv")
                template = pd.merge(template, current_df, on='label_name')
                template.to_csv(f"microkinetics/{folder_name}/{folder_name}-plot.csv", index=False)

            fig, ax = plt.subplots(figsize=(6,3), dpi=300)
            for line in set(template['line'].values):
                if line > 20:
                    continue
                else:
                    curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
                    x_solid, y_solid = interpolate(curr_line_df, line=line)

                    for idx, point in curr_line_df.iterrows():
                        if point['is_point'] == True:
                            ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                        if point['is_TS'] == True:
                            # Locate triangle marker index
                            idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                            idx_1 = np.where(x_solid >= point['point'])[0][0]
                            idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                            ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                                    zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                        
                        if point['is_label'] == True:
                            label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                            ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                        ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                        fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                        
                    
                            
            ax.spines[['right', 'top', 'bottom']].set_visible(False)           
            ax.set_yticklabels([])
            ax.set_xticklabels([])
            plt.tick_params(left = False,bottom=False) 
            ax.set_xlim(left=-1.5)    
            ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

            ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
            ax.set_ylim(-45, 30)
            plt.title(f"L = {proper_names_dict[ligand]}, {nice_functionalizations[functionalization]}-NaphBpin", size=8)
            plt.savefig(f"microkinetics/{folder_name}/{folder_name}-plot.png", facecolor='white', transparent=False, bbox_inches='tight')
            plt.close()
            
            etrxt_ref = template[template['label_name'] == 'ETRXT']['reference_gibbs'].values[0]
            template['reference_gibbs'] = template['reference_gibbs'] - etrxt_ref
            
            fig, ax = plt.subplots(figsize=(6,3), dpi=300)
            for line in set(template['line'].values):
                if line > 20:
                    continue
                else:
                    curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
                    x_solid, y_solid = interpolate(curr_line_df, line=line)

                    for idx, point in curr_line_df.iterrows():
                        if point['is_point'] == True:
                            ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                        if point['is_TS'] == True:
                            # Locate triangle marker index
                            idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                            idx_1 = np.where(x_solid >= point['point'])[0][0]
                            idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                            ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                                    zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                        
                        if point['is_label'] == True:
                            label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                            ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                        ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                        fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                        
                    
                            
            ax.spines[['right', 'top', 'bottom']].set_visible(False)           
            ax.set_yticklabels([])
            ax.set_xticklabels([])
            plt.tick_params(left = False,bottom=False) 
            ax.set_xlim(left=-1.5)    
            ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

            ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
            ax.set_ylim(-45, 30)
            plt.title(f"L = {proper_names_dict[ligand]}, {nice_functionalizations[functionalization]}-NaphBpin", size=8)
            plt.savefig(f"microkinetics/{folder_name}/{folder_name}-plot-rxtref.png", facecolor='white', transparent=False, bbox_inches='tight')
            plt.close()

In [19]:
base_diffusion_consideration_dict = {"C2 // H$_2$O": ["C2", "C2 + H$_2$O"],
                       "C3 // B(OH)$_3$": ["C3 + B(OH)$_3$", "C3"],
                       "C4-PDT": ['C4', 'PDT'],
                       "T2 // H$_2$O": ["T2", "T2 + H$_2$O"],
                       "T3 // B(OH)$_3$": ["T3 + B(OH)$_3$", "T3"],
                       'T4-PDT': ["T4", "PDT"],
                       "Pd-RXT": ["[LPd(OH)$_2$]$_2$", "RXT",],
                        }

pdt_energy = (get_reactant_energy('naph') + get_reactant_energy('boh3') - get_reactant_energy('naphboh2') - get_reactant_energy('h2o'))*2625.5/4.184

for type_name in ['main']:

    diffusion_consideration_dict = dict(base_diffusion_consideration_dict)

    for ligand in structure_dict['functionalized']:
        print(f"{type_name}-{ligand}")
        os.makedirs(f"microkinetics/{type_name}-{ligand}", exist_ok=True)
        current_df = main_df[main_df['group'] == ligand][['structure', 'reference_gibbs']].copy()
        current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
        for diffusion, consideration in diffusion_consideration_dict.items():
            energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)  
        try:
            template = pd.read_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv")
            template = pd.merge(template, current_df[['label_name', 'reference_gibbs']], on='label_name', how='left', suffixes=('_old', ""))
            template = template.drop('reference_gibbs_old', axis=1)
            template.sort_values(by=['line', 'point'], ascending=[True, True], inplace=True)
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv", index=False)
        except FileNotFoundError:
            template = pd.read_csv(f"templates/rpd_template_{type_name}.csv")
            template = pd.merge(template, current_df, on='label_name')
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv", index=False)
            
        rxt_reference = template[template['label_name'] == 'RXT']['reference_gibbs'].values[0]
        template['reference_gibbs'] = template['reference_gibbs'] - rxt_reference

        fig, ax = plt.subplots(figsize=(6,3), dpi=300)
        for line in set(template['line'].values):
            if line > 20:
                continue
            else:
                curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
                x_solid, y_solid = interpolate(curr_line_df, line=line)

                for idx, point in curr_line_df.iterrows():
                    if point['is_point'] == True:
                        ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                    if point['is_TS'] == True:
                        # Locate triangle marker index
                        idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                        idx_1 = np.where(x_solid >= point['point'])[0][0]
                        idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                        ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                                zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
                    
                    if point['is_label'] == True:
                        label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                        ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                    ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                    fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                    
                
                        
        ax.spines[['right', 'top', 'bottom']].set_visible(False)           
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        plt.tick_params(left = False,bottom=False) 
        ax.set_xlim(left=-1.5)    
        ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)

        ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
        if f"{type_name}-{ligand}" == "p1-3t-tbujohnphos":
            ax.set_ylim(-30, 45)
        elif f"{type_name}-{ligand}" == "yayb-3w-tbuxphos" or f"{type_name}-{ligand}" == "yayb-3aa-adbrettphos":
            ax.set_ylim(-30, 40)
        elif type_name == 'ob' or type_name == 'xaxb':
            ax.set_ylim(-30, 30)
        elif type_name == 'hc':
            ax.set_ylim(-65, 30)
        else:
            ax.set_ylim(-45, 30)
        plt.title(f"L = {proper_names_dict[ligand]}", size=8)
        plt.savefig(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot-rxtref.png", facecolor='white', transparent=False, bbox_inches='tight')
        plt.close()

main-3e-pcy3
main-3h-ptbu3
main-3m-cyjohnphos
main-3t-tbujohnphos


## Barrier Tuning to Parity Prediction Example Plots

In [20]:
for type_name in ['main']:
    for ligand in ['3e-pcy3', '3m-cyjohnphos']:
        diffusion_consideration_dict = dict(base_diffusion_consideration_dict)
        print(f"{type_name}-{ligand}")
        current_df = main_df[main_df['group'] == ligand][['structure', 'reference_gibbs']].copy()
        current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
        for diffusion, consideration in diffusion_consideration_dict.items():
            energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)
            
        template = pd.read_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot-btpp.csv")

        fig, ax = plt.subplots(figsize=(6,3), dpi=300)
        for line in set(template['line'].values):
            curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
            x_solid, y_solid = interpolate(curr_line_df, line=line)

            for idx, point in curr_line_df.iterrows():
                if point['is_point'] == True:
                    ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                if point['is_TS'] == True:
                    # Locate triangle marker index
                    idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                    idx_1 = np.where(x_solid >= point['point'])[0][0]
                    idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                    ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                            zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' else '-')
                
                if point['is_label'] == True:
                    label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                    ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                
            
                    
        ax.spines[['right', 'top', 'bottom']].set_visible(False)           
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        plt.tick_params(left = False,bottom=False) 
        ax.set_xlim(left=-1.5)
        ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)
        ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
        ax.set_ylim(-30, 30)
        plt.title(f"L = {proper_names_dict[ligand]}", size=8)
        plt.savefig(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot-btpp.png", facecolor='white', transparent=False, bbox_inches='tight')
        plt.close()

main-3e-pcy3
main-3m-cyjohnphos


## Simplified Model Plot

In [21]:
template = pd.read_csv("templates/rpd_template_simplified.csv")

fig, ax = plt.subplots(figsize=(6,3), dpi=300)
for line in set(template['line'].values):
    curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
    x_solid, y_solid = interpolate(curr_line_df, line=line)

    for idx, point in curr_line_df.iterrows():
        if point['is_point'] == True:
            ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
        if point['is_TS'] == True:
            # Locate triangle marker index
            idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
            idx_1 = np.where(x_solid >= point['point'])[0][0]
            idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
            ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                    zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' else '-')
        
        if point['is_label'] == True:
            label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

            ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                        ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                        fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
        
    
            
ax.spines[['right', 'top', 'bottom']].set_visible(False)           
ax.set_yticklabels([])
ax.set_xticklabels([])
plt.tick_params(left = False,bottom=False) 
ax.set_xlim(left=-1)
ax.plot(-1, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)
ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
ax.set_ylim(-45, 30)
plt.title(f"Simplified Two-Barrier Model", size=8)
plt.savefig(f"figures/figure7.png", facecolor='white', transparent=False, bbox_inches='tight')
plt.close()

## Setup for Error Curve analysis

In [22]:
exp_yield_dict = {'3a-pph3': {'exp_yield': 0.008, 'exp_error': 0.0001},
                '3b-potol3': {'exp_yield': 0.647, 'exp_error': 0.0333},
                '3c-pipr3': {'exp_yield': 0.035, 'exp_error': 0.0131},
                '3d-pcpt3': {'exp_yield': 0.04, 'exp_error': 0.0091},
                '3e-pcy3': {'exp_yield': 0.036, 'exp_error': 0.028},
                '3f-pcy2tbu': {'exp_yield': 0.1, 'exp_error': 0.0255},
                '3g-ptbu2cy': {'exp_yield': 0.322, 'exp_error': 0.0127},
                '3h-ptbu3': {'exp_yield': 0.907, 'exp_error': 0.0186},
                '3i-pad3': {'exp_yield': 0.43, 'exp_error': 0.0466},
                '3j-iprjohnphos': {'exp_yield': 0.085, 'exp_error': 0.0431},
                '3k-phjohnphos': {'exp_yield': 0.186, 'exp_error': 0.0007},
                '3l-phdavephos': {'exp_yield': 0.089, 'exp_error': 0.0},
                '3m-cyjohnphos': {'exp_yield': 0.106, 'exp_error': 0.0057},
                '3n-cymephos': {'exp_yield': 0.154, 'exp_error': 0.0276},
                '3o-davephos': {'exp_yield': 0.209, 'exp_error': 0.0323},
                '3p-sphos': {'exp_yield': 0.15, 'exp_error': 0.0029},
                '3q-ruphos': {'exp_yield': 0.11, 'exp_error': 0.0175},
                '3r-xphos': {'exp_yield': 0.124, 'exp_error': 0.0278},
                '3s-brettphos': {'exp_yield': 0.063, 'exp_error': 0.0277},
                '3t-tbujohnphos': {'exp_yield': 0.7, 'exp_error': 0.0155},
                '3u-tbumephos': {'exp_yield': 0.448, 'exp_error': 0.0087},
                '3v-tbudavephos': {'exp_yield': 0.855, 'exp_error': 0.0277},
                '3w-tbuxphos': {'exp_yield': 0.829, 'exp_error': 0.0117},
                '3x-tbubrettphos': {'exp_yield': 0.66, 'exp_error': 0.0833},
                '3y-me4tbuxphos': {'exp_yield': 0.343, 'exp_error': 0.0884},
                '3z-adjohnphos': {'exp_yield': 0.257, 'exp_error': 0.0009},
                '3aa-adbrettphos': {'exp_yield': 0.252, 'exp_error': 0.0105},
                '9a-cbujohnphos': {'exp_yield': np.nan, 'exp_error': np.nan},
                '9b-cptjohnphos': {'exp_yield': np.nan, 'exp_error': np.nan},
                '9c-etjohnphos': {'exp_yield': np.nan, 'exp_error': np.nan},
                '9d-mejohnphos': {'exp_yield': np.nan, 'exp_error': np.nan},
                '9e-pcbu3': {'exp_yield': np.nan, 'exp_error': np.nan},
                '9f-pcpr3': {'exp_yield': np.nan, 'exp_error': np.nan},
                '9g-pet3': {'exp_yield': np.nan, 'exp_error': np.nan},
                '9h-pme3': {'exp_yield': np.nan, 'exp_error': np.nan},
                '6cn3e-pcy3': {'exp_yield': 0.058, 'exp_error': 0.009},
                '6cn3h-ptbu3': {'exp_yield': 0.901, 'exp_error': 0.014},
                '6cn3m-cyjohnphos': {'exp_yield': 0.188, 'exp_error': 0.003},
                '6cn3t-tbujohnphos': {'exp_yield': 0.758, 'exp_error': 0.002},
                '6ome3e-pcy3': {'exp_yield': 0.045, 'exp_error': 0.003},
                '6ome3h-ptbu3': {'exp_yield': 0.904, 'exp_error': 0.019},
                '6ome3m-cyjohnphos': {'exp_yield': 0.080, 'exp_error': 0.011},
                '6ome3t-tbujohnphos': {'exp_yield': 0.572, 'exp_error': 0.016},
}

In [23]:
# merge main_df, ester_df, functionalized_acid_df, functionalized_ester_df
functionalized_acid_for_merge_df = functionalized_acid_df.copy()
functionalized_acid_for_merge_df['group'] = functionalized_acid_for_merge_df['functionalization'] + functionalized_acid_for_merge_df['group']
functionalized_ester_for_merge_df = functionalized_ester_df.copy()
functionalized_ester_for_merge_df['group'] = functionalized_ester_for_merge_df['functionalization'] + functionalized_ester_for_merge_df['group']

merged_df = pd.concat([main_df, ester_df, functionalized_acid_for_merge_df, functionalized_ester_for_merge_df], ignore_index=True)


In [24]:
pivoted_df = merged_df.pivot(index='group', columns='structure', values='reference_gibbs')
pivoted_df = pivoted_df.reset_index()

pivoted_df['exp_yield'] = pivoted_df.apply(lambda row: exp_yield_dict[row['group']]['exp_yield'], axis=1)
pivoted_df['exp_error'] = pivoted_df.apply(lambda row: exp_yield_dict[row['group']]['exp_error'], axis=1)

pivoted_df['max_C_TM'] = pivoted_df[['02-ts-rxt-c1', '04-ts-c1-c2']].max(axis=1)
pivoted_df['max_C_TM_name'] = pivoted_df[['02-ts-rxt-c1', '04-ts-c1-c2']].idxmax(axis=1)
pivoted_df['max_T_uncorr'] = pivoted_df[['12-ts-rxt-t1', '14-ts-t1-t2', '17-ts-t2-t3', '20-ts-t3-t4']].max(axis=1)
pivoted_df['max_T_uncorr_name'] = pivoted_df[['12-ts-rxt-t1', '14-ts-t1-t2', '17-ts-t2-t3', '20-ts-t3-t4']].idxmax(axis=1)
pivoted_df['max_overall_uncorr_name'] = pivoted_df[['02-ts-rxt-c1', '04-ts-c1-c2', '12-ts-rxt-t1', '14-ts-t1-t2', '17-ts-t2-t3', '20-ts-t3-t4']].idxmax(axis=1)
pivoted_df['ddG_uncorr'] = pivoted_df['max_C_TM'] - pivoted_df['max_T_uncorr']

for h2o_eq in [1.0, 3.5, 5.0]:
    pivoted_df[f'17-ts-t2-t3-corr_{h2o_eq}'] = pivoted_df['17-ts-t2-t3'] + get_second_order_barrier_correction(h2o_eq*0.02, 333.15)
    pivoted_df[f'max_T_corr_{h2o_eq}'] = pivoted_df[['12-ts-rxt-t1', '14-ts-t1-t2', f'17-ts-t2-t3-corr_{h2o_eq}', '20-ts-t3-t4']].max(axis=1)
    pivoted_df[f'max_T_corr_{h2o_eq}_name'] = pivoted_df[['12-ts-rxt-t1', '14-ts-t1-t2', f'17-ts-t2-t3-corr_{h2o_eq}', '20-ts-t3-t4']].idxmax(axis=1)
    pivoted_df[f'max_overall_corr_{h2o_eq}_name'] = pivoted_df[['02-ts-rxt-c1', '04-ts-c1-c2', '12-ts-rxt-t1', '14-ts-t1-t2', f'17-ts-t2-t3-corr_{h2o_eq}', '20-ts-t3-t4']].idxmax(axis=1)
    pivoted_df[f'ddG_corr_{h2o_eq}'] = pivoted_df['max_C_TM'] - pivoted_df[f'max_T_corr_{h2o_eq}']


pivoted_df.to_csv("all_barriers.csv", index=False)


## Setup for Microkinetics

In [25]:
microkinetics_of_interest = []

for main in structure_dict['main']:
    microkinetics_of_interest.append(f"main-{main}")

for ob in structure_dict['ob']:
    microkinetics_of_interest.append(f"ob-{ob}")

for hc in structure_dict['hc']:
    microkinetics_of_interest.append(f"hc-{hc}")

for p1 in structure_dict['p1']:
    microkinetics_of_interest.append(f"p1-{p1}")

for pd3oac6 in structure_dict['pd3oac6']:
    microkinetics_of_interest.append(f"pd3oac6-{pd3oac6}")

microkinetics_of_interest.append("yayb-3s-brettphos")

for ester in ester_df['group'].unique():
    microkinetics_of_interest.append(f"ester-{ester}")
    
for functionalization in functionalizations:
    for main in structure_dict['functionalized']:
        microkinetics_of_interest.append(f"main-{functionalization}{main}")
        microkinetics_of_interest.append(f"ester-{functionalization}{main}")

base_mk_params = pd.DataFrame(microkinetics_of_interest, columns=["microkinetic_name"])
base_mk_params['microkinetic_type'] = base_mk_params.apply(lambda row: row['microkinetic_name'].split('-')[0], axis=1)
base_mk_params['ligand_name'] = base_mk_params.apply(lambda row: row['microkinetic_name'].split('-')[1] + '-' + row['microkinetic_name'].split('-')[2], axis=1)
base_mk_params['exp_yield'] = base_mk_params.apply(lambda row: exp_yield_dict[row['ligand_name']]['exp_yield'], axis=1)
base_mk_params['exp_error'] = base_mk_params.apply(lambda row: exp_yield_dict[row['ligand_name']]['exp_error'], axis=1)

pivoted_df['min_of_max_barrier'] = pivoted_df[['max_C_TM', 'max_T_corr_3.5']].min(axis=1)

base_mk_params = pd.merge(base_mk_params, pivoted_df[['group', 'min_of_max_barrier']], left_on='ligand_name', right_on='group')
base_mk_params.sort_values(by=['microkinetic_name', 'microkinetic_type'], ascending=[True, True], inplace=True)
base_mk_params = base_mk_params.drop(columns=['group']).reset_index(drop=True)

base_mk_params.to_csv("base_microkinetic_parameters.csv", index=False)


In [26]:
pivoted_df

structure,group,00-lpdoh2,01-rxt,02-ts-rxt-c1,03-c1,04-ts-c1-c2,05-c2,06-c2-h2o,07-ts-c2-c3,08-c3-boh3,09-c3,10-ts-c3-c4,11-c4,12-ts-rxt-t1,13-t1,14-ts-t1-t2,15-t2,16-t2-h2o,17-ts-t2-t3,18-t3-boh3,19-t3,20-ts-t3-t4,21-t4,22-c5,23-ts-c5-c6,24-t5,25-ts-t5-t6,26-t6,27-ts-t2-xa,28-xa,29-ts-xa-t3,30-ts-t2-xb,31-xb,32-ts-xb-t3,33-ts-rxt-ya,34-ya,35-ts-ya-c1,36-ts-rxt-yb,37-yb,38-ts-yb-c1,39-t2-lig,40-ts-t2-p1,41-p1-boh3,42-p1,43-t3-lig,44-ts-t3-p1,45-p1-h2o,58-c2alt,59-t2-2h2o,60-ts-t2-t3ob,61-t3ob-2h2o,62-t3ob-h2o,63-t3ob,64-ts-t3ob-t4ob,65-t4ob,66-ts-t2-t4ob,67-t4ob-h2o,68-ts-t4ob-t4-4mps,69-t4-boh3,70-t4ob-2h2o,71-ts-t4ob-t4-6mps,72-t4-boh3h2o,73-ts-t4ob-t5ob,74-t5ob-rh,75-t5ob,76-t5ob-1h2o,77-ts-t5ob-t6ob-4mps,78-t6ob,79-t5ob-2h2o,80-ts-t5ob-t6ob-6mps,81-t6ob-1h2o,82-ts-t2-t3-4mps,83-ts-t2-t3-6mps,84-t3-boh3h2o,85-c2dim,86-c2-naphboh2,87-ts-c2-naphboh2-hc1,88-hc1-boh3,89-hc1,90-ts-hc1-hc2,91-hc2,92-ts-hc2-hc3,93-hc3,94-ts-hc3-pd0boh3,95-lpd0boh3,96-hc4,97-ts-hc4-lpd0,98-lpd0,e00-lpdoh2,e01-ecrxt,e02-ts-ecrxt-ec1,e03-ec1,e04-ts-ec1-ec2,e05-ec2,e06-ec2-h2o,e07-ts-ec2-ec3,e08-ec3-bpinoh,e09-ec3,e10-ts-ec3-ec4,e11-ec4,e12-etrxt,e13-ts-etrxt-et1,e14-et1,e15-ts-et1-et2,e16-et2,e17-et2-h2o,e18-ts-et2-et3,e19-et3-bpinoh,e20-et3,e21-ts-et3-et4,e22-et4,e23-ts-etrxt-ec1b,e24-ec1b,e25-ts-ec1b-ec2b,e26-ec2b,e27-ec2b-h2o,e28-ts-ec2b-ec3b,e29-ec3b-bpinoh,e30-ts-ecrxt-et1b,e31-et1b,e32-ts-et1b-et2b,e33-et2b,e34-et2b-h2o,e35-ts-et2b-et3b,e36-et3b-bpinoh,exp_yield,exp_error,max_C_TM,max_C_TM_name,max_T_uncorr,max_T_uncorr_name,max_overall_uncorr_name,ddG_uncorr,17-ts-t2-t3-corr_1.0,max_T_corr_1.0,max_T_corr_1.0_name,max_overall_corr_1.0_name,ddG_corr_1.0,17-ts-t2-t3-corr_3.5,max_T_corr_3.5,max_T_corr_3.5_name,max_overall_corr_3.5_name,ddG_corr_3.5,17-ts-t2-t3-corr_5.0,max_T_corr_5.0,max_T_corr_5.0_name,max_overall_corr_5.0_name,ddG_corr_5.0,min_of_max_barrier
0,3a-pph3,0.0,2.765121,13.593426,2.073605,5.964165,-29.891732,-27.998535,-22.379815,-27.701723,-22.714905,7.940192,-7.816573,10.168479,2.52855,13.641117,-3.684422,-0.194214,11.215165,-4.228473,-3.843182,14.545358,-10.441445,-19.571709,14.128692,11.414085,17.171485,,,,,,,,,,,,,,-7.030931,3.942956,-41.824453,-41.938033,-7.610122,-4.554151,-38.931634,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.008,0.0001,13.593426,02-ts-rxt-c1,14.545358,20-ts-t3-t4,20-ts-t3-t4,-0.951932,13.804925,14.545358,20-ts-t3-t4,20-ts-t3-t4,-0.951932,12.975595,14.545358,20-ts-t3-t4,20-ts-t3-t4,-0.951932,12.739477,14.545358,20-ts-t3-t4,20-ts-t3-t4,-0.951932,13.593426
1,3aa-adbrettphos,0.0,5.198917,24.075032,17.953049,22.419034,-18.411758,-13.224764,-7.862067,-11.868716,-11.444519,23.207814,10.539023,19.88515,6.091235,15.698407,0.734186,9.961714,19.007892,9.604661,-4.973641,16.283246,0.380271,-3.183984,25.964463,13.109302,27.130376,,,,,,,,19.614694,-0.239709,21.413136,27.07641,4.129013,36.271935,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.252,0.0105,24.075032,02-ts-rxt-c1,19.88515,12-ts-rxt-t1,02-ts-rxt-c1,4.189881,21.597652,21.597652,17-ts-t2-t3-corr_1.0,02-ts-rxt-c1,2.477379,20.768323,20.768323,17-ts-t2-t3-corr_3.5,02-ts-rxt-c1,3.306709,20.532204,20.532204,17-ts-t2-t3-corr_5.0,02-ts-rxt-c1,3.542828,20.768323
2,3b-potol3,0.0,3.935112,14.91904,4.182351,6.22866,-29.067498,-25.125483,-19.312862,-24.096995,-22.073276,10.006267,-5.200799,9.754636,2.561494,14.426445,-1.357303,0.471887,11.380513,2.365084,-4.346131,13.934477,-10.508275,-20.33382,12.475518,13.019568,20.315622,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.647,0.0333,14.91904,02-ts-rxt-c1,14.426445,14-ts-t1-t2,02-ts-rxt-c1,0.492595,13.970274,14.426445,14-ts-t1-t2,02-ts-rxt-c1,0.492595,13.140944,14.426445,14-ts-t1-t2,02-ts-rxt-c1,0.492595,12.904825,14.426445,14-ts-t1-t2,02-ts-rxt-c1,0.492595,14.426445
3,3c-pipr3,0.0,2.965924,16.900401,4.392881,10.09004,-23.079488,-23.316059,-17.658433,-21.964403,-17.761344,9.766245,-4.101716,10.619031,2.85862,14.762476,2.529805,4.140622,15.574474,3.456636,-0.597075,14.495785,-9.966421,-16.636847,15.026658,15.03105,18.722061,,,,,,,,,,,,,,-0.839294,12.630199,-25.585761,-26.922984,-0.76713,11.103468,-25.9767,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.035,0.0131,16.900401,02-ts-rxt-c1,15.574474,17-ts-t2-t3,02-ts-rxt-c1,1.325928,18.164234,18.164234,17-ts-t2-t3-corr_1.0,17-ts-t2-t3-corr_1.0,-1.263833,17.334904,17.334904,17-ts-t2-t3-corr_3.5,17-ts-t2-t3-corr_3.5,-0.434503,17.098785,17.098785,17-ts-t2-t3-corr_5.0,17-ts-t2-t3-corr_5.0,-0.198384,16.900401
4,3d-pcpt3,0.0,0.861571,15.331314,5.10479,8.976524,-27.060722,-24.952918,-19.482289,-22.94175,-20.19765,9.820525,-4.56325,10.107924,1.374246,15.855912,-0.465612,3.128135,14.27396,1.664155,-1.004015,14.536886,-11.616457,-19.157867,15.521449,13.920045,17.943636,,,,,,,,,,,,,,-1.555596,10.359555,-30.782481,-29.797291,-2.621107,9.627879,-26.310848,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.04,0.0091,15.331314,02-ts-rxt-c1,15.855912,14-ts-t1-t2,14-ts-t1-t2,-0.524598,16.86372,16.86372,17-ts-t2-t3-corr_1.0,17-ts-t2-t3-corr_1.0,-1.532407,16.034391,16.034391,17-ts-t2-t3-corr_3.5,17-ts-t2-t3-corr_3.5,-0.703077,15.798272,15.855912,14-ts-t1-t2,14-ts-t1-t2,-0.524598,15.331314
5,3e-pcy3,0.0,2.253387,16.308346,5.909258,10.564123,-25.122345,-22.368833,-15.343864,-21.638412,-17.079555,10.753004,-1.081826,11.120097,4.476026,16.223005,1.716239,5.889805,16.741955,8.68034,1.838603,19.506135,-6.502254,-15.766805,15.997101,7.115331,23.058466,,,,,,,,,,,,,,-1.94026,11.625242,-24.371844,-28.139411,-0.33258,11.160885,-19.492957,-20.891048,,,,,,,,,,,,,,,,,,,,,,,,,,,-34.332617,-25.318756,-17.343109,-30.501357,-32.615437,-26.372344,-33.546034,-18.270568,-33.764407,-24.313485,-55.712809,-34.354266,-29.506127,-46.418137,0.0,6.335337,18.167029,11.118842,12.683223,-21.731284,-19.496722,-13.9715,-19.631009,-14.400089,13.43247,1.597639,2.532629,12.368213,4.90336,18.575538,8.965229,9.289024,23.410499,6.314629,4.518069,22.185601,-3.822788,,,,,,,,,,,,,,,0.036,0.028,16.308346,02-ts-rxt-c1,19.506135,20-ts-t3-t4,20-ts-t3-t4,-3.197789,19.331715,19.506135,20-ts-t3-t4,20-ts-t3-t4,-3.197789,18.502386,19.506135,20-ts-t3-t4,20-ts-t3-t4,-3.197789,18.266267,19.506135,20-ts-t3-t4,20-ts-t3-t4,-3.197789,16.308346
6,3f-pcy2tbu,0.0,-0.359563,12.221376,7.538272,12.198786,-26.325281,-23.637658,-18.175187,-22.151715,-19.776591,9.81676,0.673945,9.286514,2.154868,12.759152,-1.234939,3.338978,12.353781,2.539531,-1.771459,13.062239,-10.267311,-17.804956,17.765423,14.758397,19.05872,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.1,0.0255,12.221376,02-ts-rxt-c1,13.062239,20-ts-t3-t4,20-ts-t3-t4,-0.840863,14.943541,14.943541,17-ts-t2-t3-corr_1.0,17-ts-t2-t3-corr_1.0,-2.722165,14.114212,14.114212,17-ts-t2-t3-corr_3.5,17-ts-t2-t3-corr_3.5,-1.892835,13.878093,13.878093,17-ts-t2-t3-corr_5.0,17-ts-t2-t3-corr_5.0,-1.656716,12.221376
7,3g-ptbu2cy,0.0,2.685113,17.137914,10.540906,14.642936,-25.068379,-23.055956,-16.256263,-19.786631,-17.880885,14.570144,2.223894,11.074916,2.740962,14.007896,-1.154618,3.791413,15.399712,2.954943,-1.066139,14.133398,-9.989952,-11.615202,18.690372,15.656364,20.776842,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.322,0.0127,17.137914,02-ts-rxt-c1,15.399712,17-ts-t2-t3,02-ts-rxt-c1,1.738201,17.989472,17.989472,17-ts-t2-t3-corr_1.0,17-ts-t2-t3-corr_1.0,-0.851559,17.160143,17.160143,17-ts-t2-t3-corr_3.5,17-ts-t2-t3-corr_3.5,-0.022229,16.924024,16.924024,17-ts-t2-t3-corr_5.0,02-ts-rxt-c1,0.21389,17.137914
8,3h-ptbu3,0.0,2.30798,11.682973,11.270699,16.267558,-25.382762,-23.27182,-15.74547,-19.000989,-18.446899,15.476268,2.571534,11.733174,2.459837,13.182093,-2.353788,1.12638,12.267184,-1.850526,-3.496483,13.164523,-6.398087,-14.976771,24.650458,14.355536,20.193258,,,,,,,,,,,,,,,,,,,,,-21.211078,,,,,,,,,,,,,,,,,,,,,,,,,,,-30.165012,-21.1728,-13.337716,-28.924426,-32.287877,-26.142048,-31.376733,-20.523955,-35.477508,-31.249976,-61.544255,-42.012393,-39.994322,-59.398172,0.0,6.438248,13.794543,12.937992,20.432339,-23.340846,-19.066878,-14.353654,-18.084826,-15.767433,18.155734,5.251,3.372236,13.691631,3.322663,16.735052,1.541791,6.485939,15.205184,0.747364,-0.817017,15.843989,-3.718622,15.645696,14.354281,20.941249,-23.827793,-23.351513,-12.527601,-17.007392,21.890671,14.41013,27.535747,2.024346,6.812871,14.382519,5.823916,0.907,0.0186,16.267558,04-ts-c1-c2,13.182093,14-ts-t1-t2,04-ts-c1-c2,3.085465,14.856945,14.856945,17-ts-t2-t3-corr_1.0,04-ts-c1-c2,1.410613,14.027615,14.027615,17-ts-t2-t3-corr_3.5,04-ts-c1-c2,2.239943,13.791496,13.791496,17-ts-t2-t3-corr_5.0,04-ts-c1-c2,2.476062,14.027615
9,3i-pad3,0.0,1.326241,10.462153,8.022396,15.405673,-26.386463,-22.847309,-17.363503,-18.759085,-19.845931,15.82987,-5.429526,11.40781,2.574358,13.611624,-3.422123,0.491654,10.000934,1.107868,-2.269388,13.973069,-7.263737,-14.82711,23.54259,4.553523,22.564303,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.43,0.0466,15.405673,04-ts-c1-c2,13.973069,20-ts-t3-t4,04-ts-c1-c2,1.432604,12.590694,13.973069,20-ts-t3-t4,04-ts-c1-c2,1.432604,11.761364,13.973069,20-ts-t3-t4,04-ts-c1-c2,1.432604,11.525246,13.973069,20-ts-t3-t4,04-ts-c1-c2,1.432604,13.973069


## Base Catalysis

In [27]:
base_catalysis_naphboh3_dict = {"reference" : get_reactant_energy('naphboh3') + get_reactant_energy('naphboh2') + get_reactant_energy('h2o')}
base_catalysis_naphboh3_dict["[NaphB(OH)$_3$]$^-$"] = (get_reactant_energy('naphboh3') + get_reactant_energy('naphboh2') + get_reactant_energy('h2o') - base_catalysis_naphboh3_dict["reference"])*2625.5/4.184
base_catalysis_naphboh3_dict["TS0"] = (get_energy(base_catalysis_df, '00-ts-naphboh3-ts0') + get_reactant_energy('naphboh2') + get_reactant_energy('h2o') - base_catalysis_naphboh3_dict["reference"])*2625.5/4.184
base_catalysis_naphboh3_dict["Naph-H + [OB(OH)$_2$]$^-$"] = (get_reactant_energy('naph') + get_reactant_energy('oboh2') + get_reactant_energy('naphboh2') + get_reactant_energy('h2o') - base_catalysis_naphboh3_dict["reference"])*2625.5/4.184
base_catalysis_naphboh3_dict["Naph-H + [B(OH)$_4$]$^-$"] = (get_reactant_energy('naph') + get_reactant_energy('boh4') + get_reactant_energy('naphboh2') - base_catalysis_naphboh3_dict["reference"])*2625.5/4.184
base_catalysis_naphboh3_dict["[NaphB(OH)$_3$]$^-$ + H$_2$O"] = (get_energy(base_catalysis_df, '01-naphboh3-h2o') + get_reactant_energy('naphboh2') - base_catalysis_naphboh3_dict["reference"])*2625.5/4.184
base_catalysis_naphboh3_dict["TS1"] = (get_energy(base_catalysis_df, '02-ts-naphboh3-ts1') + get_reactant_energy('naphboh2') - base_catalysis_naphboh3_dict["reference"])*2625.5/4.184
base_catalysis_naphboh3_dict["Naph-H + B(OH)$_3$ + [NaphB(OH)$_3$]$^-$"] = (get_reactant_energy("naph") + get_reactant_energy('boh3') + get_reactant_energy('naphboh3') - base_catalysis_naphboh3_dict["reference"])*2625.5/4.184

base_catalysis_naphboh3_dict["Naph-H + [OB(OH)$_2$]$^-$ // Naph-H + [B(OH)$_4$]$^-$"] = max(base_catalysis_naphboh3_dict["Naph-H + [OB(OH)$_2$]$^-$"], base_catalysis_naphboh3_dict["Naph-H + [B(OH)$_4$]$^-$"])+diff_barrier
base_catalysis_naphboh3_dict["[NaphB(OH)$_3$]$^-$ // [NaphB(OH)$_3$]$^-$ + H$_2$O"] = max(base_catalysis_naphboh3_dict["[NaphB(OH)$_3$]$^-$"], base_catalysis_naphboh3_dict["[NaphB(OH)$_3$]$^-$ + H$_2$O"])+diff_barrier
base_catalysis_naphboh3_dict["Naph-H + [B(OH)$_4$]$^-$ // Naph-H + B(OH)$_3$ + [NaphB(OH)$_3$]$^-$"] = max(base_catalysis_naphboh3_dict["Naph-H + [B(OH)$_4$]$^-$"], base_catalysis_naphboh3_dict["Naph-H + B(OH)$_3$ + [NaphB(OH)$_3$]$^-$"])+diff_barrier

In [28]:
base_catalysis_naphbpin_dict = {"reference" : get_reactant_energy('naphbpinoh') + get_reactant_energy('naphbpin') + get_reactant_energy('h2o')}
base_catalysis_naphbpin_dict["[NaphBpin(OH)]$^-$"] = (get_reactant_energy('naphbpinoh') + get_reactant_energy('naphbpin') + get_reactant_energy('h2o') - base_catalysis_naphbpin_dict["reference"])*2625.5/4.184
base_catalysis_naphbpin_dict["TS0"] = (get_energy(base_catalysis_df, '10-ts-naphbpinoh-ts0') + get_reactant_energy('naphbpin') + get_reactant_energy('h2o') - base_catalysis_naphbpin_dict["reference"])*2625.5/4.184
base_catalysis_naphbpin_dict["Naph-H + [OBpin]$^-$"] = (get_reactant_energy('naph') + get_reactant_energy('obpin') + get_reactant_energy('naphbpin') + get_reactant_energy('h2o') - base_catalysis_naphbpin_dict["reference"])*2625.5/4.184
base_catalysis_naphbpin_dict["Naph-H + [Bpin(OH)$_2$]$^-$"] = (get_reactant_energy('naph') + get_reactant_energy('bpinoh2') + get_reactant_energy('naphbpin') - base_catalysis_naphbpin_dict["reference"])*2625.5/4.184
base_catalysis_naphbpin_dict["[NaphBpin(OH)]$^-$ + H$_2$O"] = (get_energy(base_catalysis_df, '11-naphbpinoh-h2o') + get_reactant_energy('naphbpin') - base_catalysis_naphbpin_dict["reference"])*2625.5/4.184
base_catalysis_naphbpin_dict["TS1"] = (get_energy(base_catalysis_df, '12-ts-naphbpinoh-ts1') + get_reactant_energy('naphbpin') - base_catalysis_naphbpin_dict["reference"])*2625.5/4.184
base_catalysis_naphbpin_dict["Naph-H + Bpin(OH) + [NaphBpin(OH)]$^-$"] = (get_reactant_energy("naph") + get_reactant_energy('bpinoh') + get_reactant_energy('naphbpinoh') - base_catalysis_naphbpin_dict["reference"])*2625.5/4.184


base_catalysis_naphbpin_dict["Naph-H + [OBpin]$^-$ // Naph-H + [Bpin(OH)$_2$]$^-$"] = max(base_catalysis_naphbpin_dict["Naph-H + [OBpin]$^-$"], base_catalysis_naphbpin_dict["Naph-H + [Bpin(OH)$_2$]$^-$"])+diff_barrier
base_catalysis_naphbpin_dict["[NaphBpin(OH)]$^-$ // [NaphBpin(OH)]$^-$ + H$_2$O"] = max(base_catalysis_naphbpin_dict["[NaphBpin(OH)]$^-$"], base_catalysis_naphbpin_dict["[NaphBpin(OH)]$^-$ + H$_2$O"])+diff_barrier
base_catalysis_naphbpin_dict["Naph-H + [Bpin(OH)$_2$]$^-$ // Naph-H + Bpin(OH) + [NaphBpin(OH)]$^-$"] = max(base_catalysis_naphbpin_dict["Naph-H + [Bpin(OH)$_2$]$^-$"], base_catalysis_naphbpin_dict["Naph-H + Bpin(OH) + [NaphBpin(OH)]$^-$"])+diff_barrier

In [29]:
for type_name in ['naphboh3', 'naphbpin']:
        os.makedirs(f"microkinetics/basecat-{type_name}", exist_ok=True)
        template = pd.read_csv(f"templates/rpd_template_basecat_{type_name}.csv")
        if type_name == 'naphboh3':
            nice_name = "NaphB(OH)$_2$"
            template['reference_gibbs'] = template.apply(lambda row: base_catalysis_naphboh3_dict[row['label_name']], axis=1)
        else:
            nice_name = "NaphBpin"
            template['reference_gibbs'] = template.apply(lambda row: base_catalysis_naphbpin_dict[row['label_name']], axis=1)
        template.to_csv(f"microkinetics/basecat-{type_name}/basecat-{type_name}-plot.csv", index=False)

        fig, ax = plt.subplots(figsize=(6,3), dpi=300)
        for line in set(template['line'].values):
            curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
            x_solid, y_solid = interpolate(curr_line_df, line=line)

            for idx, point in curr_line_df.iterrows():
                if point['is_point'] == True:
                    ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                if point['is_TS'] == True:
                    # Locate triangle marker index
                    idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                    idx_1 = np.where(x_solid >= point['point'])[0][0]
                    idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                    ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                            zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' else '-')
                
                if point['is_label'] == True:
                    label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                    ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                
            
                    
        ax.spines[['right', 'top', 'bottom']].set_visible(False)           
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        plt.tick_params(left = False,bottom=False) 
        ax.set_xlim(left=-0.5)
        ax.plot(-0.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)
        ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
        ax.set_ylim(-30, 35)
        plt.title(f"Base-catalyzed {nice_name}", size=8)
        plt.savefig(f"microkinetics/basecat-{type_name}/basecat-{type_name}-plot.png", facecolor='white', transparent=False, bbox_inches='tight')
        plt.close()

## Unligated Catalysis

In [30]:
def equalize_unlig_reference(row):
    if row['structure'] in ['00-murxt']:
        return row['qh-G(T)_SPC']/2 + get_energy(base_reactants_df, 'naphboh2') + get_energy(base_reactants_df, 'h2o')
    elif row['structure'] in ['01-rxt', '02-ts-rxt-int1', '03-int1', '04-ts-int1-int2', '05-int2',
                              '12-ts-int2-int5c', '13-int5c', '14-ts-int5c-int6c', '15-int6c',
                              '22-ts-int2-int5t', '23-int5t', '24-ts-int5t-int6t', '25-int6t',
                              '33-ts-int6c-re',
                            ]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, 'h2o')
    
    elif row['structure'] in ['06-int2-h2o', '07-ts-int2-int3', '08-int3-boh3',
                              '16-int6c-h2o', '17-ts-int6c-int7c', '18-int7c-boh3',
                            '26-int6t-h2o', '27-ts-int6t-int7t', '28-int7t-boh3',
                            ]:
        return row['qh-G(T)_SPC']
    elif row['structure'] in ['09-int3', '10-ts-int3-int4', '11-int4',
                                '19-int7c', '20-ts-int7c-int8c', '21-int8c',
                                '29-int7t', '30-ts-int7t-int8t', '31-int8t',
                            ]:
        return row['qh-G(T)_SPC'] + get_energy(base_reactants_df, 'boh3')
    elif row['structure'] in ['32-rmurxt']:
        return row['qh-G(T)_SPC']/2 + get_energy(base_reactants_df, 'naph') + get_energy(base_reactants_df, 'boh3')
    else:
        print(f"Messed up {row}")

In [31]:
unlig_df['equalized_gibbs'] = unlig_df.apply(lambda row: equalize_unlig_reference(row), axis=1)
unlig_df['reference_gibbs'] = unlig_df.apply(lambda row: (row['equalized_gibbs'] - unlig_df.loc[unlig_df['structure'] == '00-murxt', 'equalized_gibbs'])*2625.5/4.184, axis=1)

In [32]:
base_diffusion_consideration_dict = {"INT2 // H$_2$O": ["INT2", "INT2 + H$_2$O"],
                       "INT3 // B(OH)$_3$": ["INT3 + B(OH)$_3$", "INT3"],
                       "INT4-PDT": ['INT4', 'PDT'],
                       "Pd-RXT": ["$\mu$-RXT", "RXT",],
                       "INT6C // H$_2$O": ["INT6C", "INT6C + H$_2$O"],
                       "INT7C // B(OH)$_3$": ["INT7C + B(OH)$_3$", "INT7C"],
                       "INT8C-R$\mu$-RXT": ['INT8C', "R$\mu$-RXT"],
                       "INT6T // H$_2$O": ["INT6T", "INT6T + H$_2$O"],
                       "INT7T // B(OH)$_3$": ["INT7T + B(OH)$_3$", "INT7T"],
                       "INT8T-R$\mu$-RXT": ['INT8T', "R$\mu$-RXT"],                       
                        }

pdt_energy = (get_reactant_energy('naph') + get_reactant_energy('boh3') - get_reactant_energy('naphboh2') - get_reactant_energy('h2o'))*2625.5/4.184
re_pdt_energy = (get_reactant_energy('binaph') + get_reactant_energy('pdboh32') - get_reactant_energy('naphboh2') - 0.5*get_energy(unlig_df, '00-murxt'))*2625.5/4.184

diffusion_consideration_dict = dict(base_diffusion_consideration_dict)
os.makedirs(f"microkinetics/unlig-pdoh2", exist_ok=True)
current_df = unlig_df.copy()
current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
current_df = pd.concat([current_df, pd.DataFrame([{'structure': 're-pdt', 'reference_gibbs': re_pdt_energy, 'label_name': 'RE-PDT'}])], ignore_index=True)
for diffusion, consideration in diffusion_consideration_dict.items():
    energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
    current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)  
try:
    template = pd.read_csv(f"microkinetics/unlig-pdoh2/unlig-pdoh2-plot.csv")
    template = pd.merge(template, current_df[['label_name', 'reference_gibbs']], on='label_name', how='left', suffixes=('_old', ""))
    template = template.drop('reference_gibbs_old', axis=1)
    template.sort_values(by=['line', 'point'], ascending=[True, True], inplace=True)
    template.to_csv(f"microkinetics/unlig-pdoh2/unlig-pdoh2-plot.csv", index=False)
except FileNotFoundError:
    template = pd.read_csv(f"templates/rpd_template_unlig-pdoh2.csv")
    template = pd.merge(template, current_df, on='label_name')
    template.to_csv(f"microkinetics/unlig-pdoh2/unlig-pdoh2-plot.csv", index=False)

fig, ax = plt.subplots(figsize=(6,3), dpi=300)
for line in set(template['line'].values):
    curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
    x_solid, y_solid = interpolate(curr_line_df, line=line)

    for idx, point in curr_line_df.iterrows():
        if point['is_point'] == True:
            ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
        if point['is_TS'] == True:
            # Locate triangle marker index
            idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
            idx_1 = np.where(x_solid >= point['point'])[0][0]
            idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
            ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                    zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' or point['point_marker'] == 'v' else '-')
        
        if point['is_label'] == True:
            label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

            ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                        ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                        fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
        
    
            
ax.spines[['right', 'top', 'bottom']].set_visible(False)           
ax.set_yticklabels([])
ax.set_xticklabels([])
plt.tick_params(left = False,bottom=False) 
ax.set_xlim(left=-1)
ax.plot(-1, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)
ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
ax.set_ylim(-45, 30)
plt.title(f"Unligated Pd(OH)$_2$", size=8)
plt.savefig(f"microkinetics/unlig-pdoh2/unlig-pdoh2-plot.png", facecolor='white', transparent=False, bbox_inches='tight')
plt.close()

## Supplementary Discussion 6.1: Palladium reference states

### Table S25

In [33]:
def get_energy_group_specific(df, ligand, structure_name):
    return df[(df['group'] == ligand) & (df['structure'] == structure_name)]['qh-G(T)_SPC'].values[0]

def construct_reference_state_table(alternative_structure_names, df, ligand, base_reactants_df):
    # Build energy dict
    energies = {name: get_energy_group_specific(df, ligand, name) for name in alternative_structure_names}
    for _, row in base_reactants_df.iterrows():
        energies[row['structure']] = row['qh-G(T)_SPC']

    d = {
        'RXT → RXT': energies['01-rxt'] - energies['01-rxt'],
        '½ μA-Ref + RB(OH)$_2$ → RXT': energies['01-rxt'] - 0.5*energies['00-lpdoh2'] - energies['naphboh2'],
        '½ μB-Ref + RB(OH)$_2$ → RXT': energies['01-rxt'] - 0.5*energies['46-ub-ref'] - energies['naphboh2'],
        '½ OAcμ-Ref + 2 H$_2$O + RB(OH)$_2$ → RXT + 2 AcOH': energies['01-rxt'] + 2*energies['acoh'] - 0.5*energies['47-oacu-ref'] - 2*energies['h2o'] - energies['naphboh2'],
        'C-Ref + RB(OH)$_2$ → RXT': energies['01-rxt'] - energies['48-c-ref'] - energies['naphboh2'],
        'T-Ref + RB(OH)$_2$ → RXT': energies['01-rxt'] - energies['49-t-ref'] - energies['naphboh2'],
        'CL-Ref + RB(OH)$_2$ → RXT + L': energies['01-rxt'] + energies['99-lig'] - energies['50-cl-ref'] - energies['naphboh2'],
        'TL-Ref + RB(OH)$_2$ → RXT + L': energies['01-rxt'] + energies['99-lig'] - energies['51-tl-ref'] - energies['naphboh2'],
        'CS-Ref + RB(OH)$_2$ → RXT + S': energies['01-rxt'] + energies['14dioxane'] - energies['52-cs-ref'] - energies['naphboh2'],
        'TS-Ref + RB(OH)$_2$ → RXT + S': energies['01-rxt'] + energies['14dioxane'] - energies['53-ts-ref'] - energies['naphboh2'],
        'CW-Ref + RB(OH)$_2$ → RXT + H$_2$O': energies['01-rxt'] + energies['h2o'] - energies['54-cw-ref'] - energies['naphboh2'],
        'TW-Ref + RB(OH)$_2$ → RXT + H$_2$O': energies['01-rxt'] + energies['h2o'] - energies['55-tw-ref'] - energies['naphboh2'],
        'OH-Ref + RB(OH)$_2$ → RXT + $^-$OH': energies['01-rxt'] + energies['oh0dioxane'] - energies['56-oh-ref'] - energies['naphboh2'],
        'KOH-Ref + RB(OH)$_2$ → RXT + KOH': energies['01-rxt'] + energies['koh'] - energies['57-koh-ref'] - energies['naphboh2'],
        'KOH-Ref + K$_2$HPO4$^-$ + RB(OH)$_2$ → RXT + [K$_3$PO$_4$--H$_2$O]': energies['01-rxt'] + energies['k3po4h2o'] - energies['57-koh-ref'] - energies['k2hpo4'] - energies['naphboh2'],
        'KOH-Ref + K$_2$HPO4$^-$ + RB(OH)$_2$ → RXT + K$_3$PO$_4$ + H$_2$O': energies['01-rxt'] + energies['k3po4'] + energies['h2o'] - energies['57-koh-ref'] - energies['k2hpo4'] - energies['naphboh2'],
        'Pd(OAc)$_2$-Ref + 2 H$_2$O + L + RB(OH)$_2$ → RXT + 2 AcOH': energies['01-rxt'] + 2*energies['acoh'] - energies['pdoac2'] - 2*energies['h2o'] - energies['99-lig'] - energies['naphboh2'],
        '½ Pd$_2$(OAc)$_4$-Ref + 2 H$_2$O + L + RB(OH)$_2$ → RXT + 2 AcOH': energies['01-rxt'] + 2*energies['acoh'] - 0.5*energies['pd2oac4'] - 2*energies['h2o'] - energies['99-lig']  - energies['naphboh2'],
        '⅓ Pd$_3$(OAc)$_6$-Ref + 2 H$_2$O + L + RB(OH)$_2$ → RXT + 2 AcOH': energies['01-rxt'] + 2*energies['acoh']  - 1/3*energies['pd3oac6'] - 2*energies['h2o'],
        '¼ Pd$_4$(OAc)$_8$-Ref + 2 H$_2$O + L + RB(OH)$_2$ → RXT + 2 AcOH': energies['01-rxt'] + 2*energies['acoh']  - 0.25*energies['pd4oac8'] - 2*energies['h2o'],   
    }


    
    for key in d.keys():
        d[key] = d[key]*2625.5/4.184
    return d


In [34]:
alternative_structure_names = ['01-rxt', '00-lpdoh2'] + list(nice_alternative_names.keys()) + ['99-lig']
alternative_structure_names.remove('58-c2alt')

In [35]:
alternative_structure_names

['01-rxt',
 '00-lpdoh2',
 '46-ub-ref',
 '47-oacu-ref',
 '48-c-ref',
 '49-t-ref',
 '50-cl-ref',
 '51-tl-ref',
 '52-cs-ref',
 '53-ts-ref',
 '54-cw-ref',
 '55-tw-ref',
 '56-oh-ref',
 '57-koh-ref',
 '85-c2dim',
 '99-lig']

In [36]:
reference_dict = {}
for ligand in ['3e-pcy3', '3m-cyjohnphos', '3h-ptbu3', '3t-tbujohnphos']:
    reference_dict[ligand] = construct_reference_state_table(alternative_structure_names, df, ligand, base_reactants_df)

In [37]:
reference_df = pd.DataFrame(reference_dict)
reference_df.to_csv("supplementary_table_s25.csv")

### Supplementary Table S26

In [38]:
def get_oh_dioxane_energy(oh_dioxane_df, energy_type, structure):
    return oh_dioxane_df.loc[(oh_dioxane_df['energy_type'] == energy_type) & (oh_dioxane_df['structure'] == structure)]['qh-G(T)'].values[0]

In [39]:
oh_dioxane_df = pd.read_csv("supplementary_table_s26.csv", skiprows=23)
oh_dioxane_df = oh_dioxane_df.tail(-1)
oh_dioxane_df = oh_dioxane_df.head(-1)
oh_dioxane_df = oh_dioxane_df[['   Structure', 'qh-G(T)', 'im']]
oh_dioxane_df.rename(columns={'   Structure': 'filename'}, inplace=True)
oh_dioxane_df['filename'] = oh_dioxane_df.apply(lambda row: row['filename'].split()[1], axis=1)
oh_dioxane_df['structure'] = oh_dioxane_df.apply(lambda row: row['filename'].split('-')[2], axis=1)
oh_dioxane_df['energy_type'] = oh_dioxane_df.apply(lambda row: row['filename'].split('-')[3], axis=1)

In [40]:
d = {}
for energy_type in oh_dioxane_df['energy_type']:
    d[energy_type] = {}
    reference_energy = 2*get_oh_dioxane_energy(oh_dioxane_df, energy_type, "14dioxane") + get_oh_dioxane_energy(oh_dioxane_df, energy_type, "oh0dioxane")
    for dioxane_num in ["0", "1", "2"]:
        d[energy_type][f'[$^-$OH -- {2-int(dioxane_num)} dioxane] + {dioxane_num}dioxane'] = ((2-int(dioxane_num))*get_oh_dioxane_energy(oh_dioxane_df, energy_type, "14dioxane") + get_oh_dioxane_energy(oh_dioxane_df, energy_type, f"oh{dioxane_num}dioxane") - reference_energy)*2625.5/4.184

In [41]:
oh_dioxane_df_results = pd.DataFrame(d).T
oh_dioxane_df_results

Unnamed: 0,[$^-$OH -- 2 dioxane] + 0dioxane,[$^-$OH -- 1 dioxane] + 1dioxane,[$^-$OH -- 0 dioxane] + 2dioxane
gassvp,0.0,-43.657723,-59.137128
smdqzvpp,0.0,5.045177,9.081946


### Data for Supplementary Figure S62-S65 and Table S27

In [42]:
def construct_pd3oac6_table(alternative_structure_names, df, ligand_list, base_reactants_df):
    
    pd3oac6_dict = {}
    for ligand in ligand_list:
        pd3oac6_dict[ligand] = {}
        energies = {name: get_energy_group_specific(df, ligand, name) for name in alternative_structure_names}
        for _, row in base_reactants_df.iterrows():
            energies[row['structure']] = row['qh-G(T)_SPC']
        # Raw energy
        pd3oac6_dict[ligand]['raw'] = 1/3*energies['pd3oac6'] + 2*energies['h2o'] + energies['99-lig'] - 2*energies['acoh'] 
        # Further reference equalization
        pd3oac6_dict[ligand]['ref'] = pd3oac6_dict[ligand]['raw'] + 2*energies['naphboh2'] + 2*energies['h2o'] + energies['99-lig']

    return pd3oac6_dict

In [43]:
ligand_list = structure_dict['pd3oac6']
pd3oac6_dict = construct_pd3oac6_table(alternative_structure_names, df, ligand_list, base_reactants_df)

In [44]:
pd3oac6_main_df = main_df[main_df['group'].isin(ligand_list)].copy()
pd3oac6_main_df['equalized_gibbs'] = pd3oac6_main_df.apply(lambda row: equalize_reference(row, row['group']), axis=1)
pd3oac6_main_df['reference_gibbs'] = pd3oac6_main_df.apply(lambda row: (row['equalized_gibbs'] - pd3oac6_dict[row['group']]['ref'])*2625.5/4.184, axis=1)

In [45]:
diff_barrier = 3.4663630747609946
type_list = ['pd3oac6']

for type_name in type_list:
    int_list, ts_list, rxt_pdt_list, diffusion_ts_list, xform_dict = get_main_microkinetics_transformations(type_name='main') # Follows main reaction pathway

    for ligand in ligand_list:
        os.makedirs(f"microkinetics/{type_name}-{ligand}", exist_ok=True)
        print(f"{type_name}-{ligand}")
        current_df = pd3oac6_main_df[pd3oac6_main_df['group'] == ligand][['structure', 'qh-G(T)_SPC']].copy()
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': '100-l2pd2oh4', 'qh-G(T)_SPC': current_df[current_df['structure'] == '00-lpdoh2']['qh-G(T)_SPC'].values[0]}])], ignore_index=True)
        current_df.loc[current_df['structure'] == '00-lpdoh2', 'qh-G(T)_SPC'] = pd3oac6_dict[ligand]['raw'] # Spoofing 00-lpdoh2 energies with pd3oac6-ref data
        int_df = current_df[(~current_df['structure'].str.contains('ts')) & (current_df['structure'].isin(int_list))].copy()
        
        for rxt in rxt_pdt_list:
            int_df = pd.concat([int_df, pd.DataFrame([{'structure': rxt, 'qh-G(T)_SPC': get_reactant_energy(rxt)}])], ignore_index=True)
        

        int_df.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-intermediates.csv", index=False)

        xform_df = pd.DataFrame(columns=['I1', 'I2', 'P1', 'P2', 'Energy', 'Backwards', 'Flag', 'Ref_L', 'Ref_R'])
        xform_df["Backwards"] = xform_df["Backwards"].astype(bool)

        for ts, xform in xform_dict.items():
            energy_l, energy_r = get_left_right_barrier_reference_energies(int_df, xform)

            if ts in diffusion_ts_list:
                ts_energy = max(energy_l, energy_r)
            else:
                ts_energy = get_energy(current_df, ts)

            xform_df = pd.concat([xform_df, pd.DataFrame([{'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3], 
                                                        'Energy': ts_energy, 'Backwards': xform[4], 'Flag': ts, 
                                                        'Ref_L': (ts_energy-energy_l)*2625.5/4.184, 
                                                        'Ref_R': (ts_energy-energy_r)*2625.5/4.184,}],
                                                        )])

        xform_df.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-transformations.csv", index=False)    


pd3oac6-3e-pcy3
pd3oac6-3h-ptbu3
pd3oac6-3m-cyjohnphos
pd3oac6-3t-tbujohnphos


  xform_df = pd.concat([xform_df, pd.DataFrame([{'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3],
  xform_df = pd.concat([xform_df, pd.DataFrame([{'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3],
  xform_df = pd.concat([xform_df, pd.DataFrame([{'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3],
  xform_df = pd.concat([xform_df, pd.DataFrame([{'I1': xform[0], 'I2': xform[1], 'P1': xform[2], 'P2': xform[3],


In [46]:
diffusion_consideration_dict = {"C2 // H$_2$O": ["C2", "C2 + H$_2$O"],
                       "C3 // B(OH)$_3$": ["C3 + B(OH)$_3$", "C3"],
                       "C4-PDT": ['C4', 'PDT'],
                       "T2 // H$_2$O": ["T2", "T2 + H$_2$O"],
                       "T3 // B(OH)$_3$": ["T3 + B(OH)$_3$", "T3"],
                       'T4-PDT': ["T4", "PDT"],
                       "Pd-RXT": ["Pd$_3$(OAc)$_6$ + L", "RXT",],
                        }

pdt_energy = (get_reactant_energy('naph') + get_reactant_energy('boh3') - get_reactant_energy('naphboh2') - get_reactant_energy('h2o'))*2625.5/4.184

for type_name in type_list:

    for ligand in ligand_list:
        print(f"{type_name}-{ligand}")
        os.makedirs(f"microkinetics/{type_name}-{ligand}", exist_ok=True)
        current_df = pd3oac6_main_df[pd3oac6_main_df['group'] == ligand][['structure', 'reference_gibbs']].copy()
        current_df['label_name'] = current_df.apply(lambda row: nice_structure_names[row['structure']], axis=1)
        current_df.loc[current_df['structure'] == '00-lpdoh2', 'reference_gibbs'] = 0 # Spoof reference number
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pd3oac6', 'reference_gibbs': 0.0, 'label_name': 'Pd$_3$(OAc)$_6$ + L'}])], ignore_index=True)
        current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'pdt', 'reference_gibbs': pdt_energy, 'label_name': 'PDT'}])], ignore_index=True)
        for diffusion, consideration in diffusion_consideration_dict.items():
            #print(consideration)
            energy = max(get_reference_gibbs_energy(current_df, consideration[0]), get_reference_gibbs_energy(current_df, consideration[1]))+diff_barrier
            current_df = pd.concat([current_df, pd.DataFrame([{'structure': 'diffusion', 'reference_gibbs': energy, 'label_name': diffusion}])], ignore_index=True)
            
        try:
            template = pd.read_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv")
            template = pd.merge(template, current_df[['label_name', 'reference_gibbs']], on='label_name', how='left', suffixes=('_old', ""))
            template = template.drop('reference_gibbs_old', axis=1)
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv", index=False)
        except FileNotFoundError:
            template = pd.read_csv(f"templates/rpd_template_main.csv")
            template = pd.merge(template, current_df, on='label_name')
            template.to_csv(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.csv", index=False)

        fig, ax = plt.subplots(figsize=(6,3), dpi=300)
        for line in set(template['line'].values):
            curr_line_df = template[template['line'] == line].copy().sort_values(by='point').reset_index(drop=True)
            x_solid, y_solid = interpolate(curr_line_df, line=line)

            for idx, point in curr_line_df.iterrows():
                if point['is_point'] == True:
                    ax.scatter(point['point'], point['reference_gibbs'], color=point['point_color'], marker=point['point_marker'], s=10)
                if point['is_TS'] == True:
                    # Locate triangle marker index
                    idx_0 = np.where(x_solid >= curr_line_df.at[idx-1, 'point'])[0][0]
                    idx_1 = np.where(x_solid >= point['point'])[0][0]
                    idx_2 = np.where(x_solid >= curr_line_df.at[idx+1, 'point'])[0][0]
                    ax.plot(x_solid[idx_0:idx_2], y_solid[idx_0:idx_2], color=curr_line_df['line_color'].values[0], 
                            zorder=-1, linewidth=1, linestyle='--' if point['point_marker'] == 'x' else '-')
                
                if point['is_label'] == True:
                    label_text = f"{point['label_name']}" f"\n{point['reference_gibbs']:.1f}"

                    ax.annotate(label_text, xy=(point['point'], point['reference_gibbs']), 
                                ha='center', color=point['point_color'], va=point['vertical_alignment'], 
                                fontsize=5, xytext=(point['point'], point['reference_gibbs']+point['vertical_offset']))
                
            
                    
        ax.spines[['right', 'top', 'bottom']].set_visible(False)           
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        plt.tick_params(left = False,bottom=False) 
        ax.set_xlim(left=-1.5)
        ax.plot(-1.5, 1, "^k", transform=ax.get_xaxis_transform(), clip_on=False, markersize=2)
        ax.set_ylabel("$\Delta G$ / $kcal$ $mol^{-1}$", size=8)
        ax.set_ylim(-45, 30)
        plt.title(f"L = {proper_names_dict[ligand]}", size=8)
        plt.savefig(f"microkinetics/{type_name}-{ligand}/{type_name}-{ligand}-plot.png", facecolor='white', transparent=False, bbox_inches='tight')
        plt.close()

pd3oac6-3e-pcy3
pd3oac6-3h-ptbu3
pd3oac6-3m-cyjohnphos
pd3oac6-3t-tbujohnphos


## Supplementary Discussion S7.3

In [47]:
main_df[((main_df['structure'] == '58-c2alt') | (main_df['structure'] == '05-c2') | (main_df['structure'] == '85-c2dim')) & (main_df['group'].isin(ligand_list))]

Unnamed: 0,filename,qh-G(T)_SPC,im,group,structure,equalized_gibbs,reference_gibbs
166,3e-pcy3-05-c2,-1886.169783,C1,3e-pcy3,05-c2,-3645.675186,-25.122345
206,3e-pcy3-58-c2alt,-1886.16304,C1,3e-pcy3,58-c2alt,-3645.668443,-20.891048
207,3e-pcy3-85-c2dim,-3267.824107,C2,3e-pcy3,85-c2dim,-3645.689863,-34.332617
281,3h-ptbu3-05-c2,-1654.248268,C1,3h-ptbu3,05-c2,-3181.844737,-25.382762
314,3h-ptbu3-58-c2alt,-1654.24162,C1,3h-ptbu3,58-c2alt,-3181.838089,-21.211078
315,3h-ptbu3-85-c2dim,-2803.966964,C2,3h-ptbu3,85-c2dim,-3181.852358,-30.165012
456,3m-cyjohnphos-05-c2,-2113.320367,C1,3m-cyjohnphos,05-c2,-4099.980046,-26.712768
502,3m-cyjohnphos-58-c2alt,-2113.319029,C1,3m-cyjohnphos,58-c2alt,-4099.978708,-25.87316
503,3m-cyjohnphos-85-c2dim,-3722.129429,C1,3m-cyjohnphos,85-c2dim,-4099.996801,-37.226377
684,3t-tbujohnphos-05-c2,-1958.710004,C1,3t-tbujohnphos,05-c2,-3790.769366,-24.496404


## Supplementary Discussion S7.4

In [48]:
sd_74_structures = ['05-c2', '06-c2-h2o', '07-ts-c2-c3', '10-ts-c3-c4', '22-c5', '23-ts-c5-c6', '15-t2', '16-t2-h2o', '17-ts-t2-t3', '20-ts-t3-t4', '24-t5', '25-ts-t5-t6']

sd_74_df = main_df[(main_df['structure'].isin(sd_74_structures)) & (main_df['group'].isin(structure_dict['altpdb']))].copy()
sd_74_df = sd_74_df.pivot(index='group', columns='structure', values='reference_gibbs')
sd_74_df = sd_74_df[sd_74_structures]

sd_74_df.to_csv('supplementary_table_s29.csv')

## Supplementary Table S36

In [49]:
for water, ts in zip([1,2], ['82-ts-t2-t3-4mps', '83-ts-t2-t3-6mps']):
    for ligand in ['3w-tbuxphos', '3x-tbubrettphos', '3y-me4tbuxphos']:
        print(ligand, ts, (get_energy_group_specific(df, ligand, ts) - get_energy_group_specific(df, ligand, '15-t2') - get_reactant_energy('h2o')*water)*2625.5/4.184)

3w-tbuxphos 82-ts-t2-t3-4mps 41.14015427836471
3x-tbubrettphos 82-ts-t2-t3-4mps 41.253733508408146
3y-me4tbuxphos 82-ts-t2-t3-4mps 41.2462033937044
3w-tbuxphos 83-ts-t2-t3-6mps 43.34773291118928
3x-tbubrettphos 83-ts-t2-t3-6mps 45.425417064785755
3y-me4tbuxphos 83-ts-t2-t3-6mps 43.11555437377719


## Supplementary Table S37

In [50]:
tunnelling_ligands = ['3x-tbubrettphos', '3y-me4tbuxphos']
tunnelling_structures = np.array([['15-t2', '25-ts-t5-t6', '26-t6']])
tunnelling_df = main_df[(main_df['group'].isin(tunnelling_ligands)) & (main_df['structure'].isin(tunnelling_structures.flatten()))].copy()

In [51]:
for group in tunnelling_ligands:
    tmp = tunnelling_df[tunnelling_df['group'] == group].copy()
    for structures in tunnelling_structures:
        print(group, structures[1])
        im = tmp[tmp['structure'] == structures[1]]['im'].values[0]
        E0_reac = tmp[tmp['structure'] == structures[0]]['qh-G(T)_SPC'].values[0] + get_reactant_energy('h2o')
        E0_TS = tmp[tmp['structure'] == structures[1]]['qh-G(T)_SPC'].values[0]
        E0_prod = tmp[tmp['structure'] == structures[2]]['qh-G(T)_SPC'].values[0] 
        T = 333.15
        %run tunnelling.py {im} {E0_reac} {E0_TS} {E0_prod} {T}

3x-tbubrettphos 25-ts-t5-t6
Wigner Kappa is 1.4653688771073825
Eckart Kappa is 1.6756443790237958
Uncorrected barrier is 31.329042303818643
Wigner Corrected barrier is 31.07607300601345
Eckart Corrected barrier is 30.9872998274862
3y-me4tbuxphos 25-ts-t5-t6
Wigner Kappa is 1.5992426599839689
Eckart Kappa is 1.9773945747460904
Uncorrected barrier is 31.75888635284601
Wigner Corrected barrier is 31.448039604158176
Eckart Corrected barrier is 31.307522119604055


<Figure size 640x480 with 0 Axes>