In [1]:
import os
import sys
sys.path.append(os.path.join(os.getcwd(), '..'))

import numpy as np

import pandas as pd
import yaml 

from PLBenchmarks import targets, ligands, edges
from rdkit import Chem
from tqdm.notebook import tqdm
import pint
unit_registry = pint.UnitRegistry()

import benchmarkpl
path = benchmarkpl.__path__[0]



In [2]:
targets.set_data_dir(path)
# directory name where results for each target are stored
results_dir = '10_results'

In [3]:
all_edges = pd.read_csv('../03_comparison_experiment/03a_all_edges_all_ffs.csv', index_col=0)
all_edges.head()

Unnamed: 0,target,edge,ligandA,ligandB,unit,DDG_Exp.,dDDG_Exp.,DDG_OpenFF-1.0,dDDG_OpenFF-1.0,DDG_OpenFF-1.0_converged,...,error_GAFF2,abserror_GAFF2,error_cGenFF,abserror_cGenFF,error_Consensus_OpenFF_GAFF2_cGenFF,abserror_Consensus_OpenFF_GAFF2_cGenFF,error_Consensus_OpenFF_GAFF2,abserror_Consensus_OpenFF_GAFF2,error_Consensus_all,abserror_Consensus_all
jnk1_edge_17124-1_18631-1,jnk1,edge_17124-1_18631-1,17124-1,18631-1,kilocalories / mole,0.26,0.37,1.19,0.09793,1.19,...,1.071262,1.071262,0.516769,0.516769,0.646112,0.646112,0.784876,0.784876,1.222263,1.222263
jnk1_edge_17124-1_18634-1,jnk1,edge_17124-1_18634-1,17124-1,18634-1,kilocalories / mole,-0.33,0.29,0.58,0.12744,0.58,...,0.829522,0.829522,0.580956,0.580956,0.852556,0.852556,0.928604,0.928604,0.798413,0.798413
jnk1_edge_18626-1_18624-1,jnk1,edge_18626-1_18624-1,18626-1,18624-1,kilocalories / mole,0.38,0.21,0.556667,0.09534,0.556667,...,0.745717,0.745717,-0.265277,0.265277,0.309516,0.309516,0.616033,0.616033,0.446727,0.446727
jnk1_edge_18626-1_18625-1,jnk1,edge_18626-1_18625-1,18626-1,18625-1,kilocalories / mole,0.77,0.21,-0.03,0.109451,-0.03,...,-0.062543,0.062543,-0.294379,0.294379,-0.388337,0.388337,-0.155679,0.155679,0.143932,0.143932
jnk1_edge_18626-1_18627-1,jnk1,edge_18626-1_18627-1,18626-1,18627-1,kilocalories / mole,0.39,0.22,0.14,0.046623,0.14,...,0.0426,0.0426,-0.232256,0.232256,-0.020344,0.020344,0.064101,0.064101,-0.12406,0.12406


In [4]:
from openforcefield.topology import Topology, Molecule
from openforcefield.typing.engines.smirnoff import ForceField

def get_unmatched_bonds(ffA, ffB, pairs_dict):
    '''
    Returns list of bonds parameters which are unique to molA given the pairs_dict
    '''
    
    bondsA = []
    for nA, parameterA in ffA[0]['Bonds'].items():
        for nB, parameterB in ffB[0]['Bonds'].items():
            match = True
            for j in [0,1]:
                if nA[j] not in pairs_dict or pairs_dict[nA[j]] != nB[j]:
                    match = False
            match_reverse = True
            for j in [0,1]:
                if nA[j] not in pairs_dict or pairs_dict[nA[j]] != nB[-j]:
                    match_reverse = False
            if match or match_reverse:
                if parameterA.id == parameterB.id:
                    #print('match', nA, nB, parameterA.id, parameterB.id)
                    break
        else:
            #print('parameterA is unique to molA', nA, parameterA.id, nB, parameterB.id)
            bondsA.append(parameterA.id)
            
            
    # turn analysis around
    pairs_dict = {b: a for a, b in pairs_dict.items()}
    bondsB = []
    for nB, parameterB in ffB[0]['Bonds'].items():
        for nA, parameterA in ffA[0]['Bonds'].items():
            match = True
            for j in [0,1]:
                if nB[j] not in pairs_dict or pairs_dict[nB[j]] != nA[j]:
                    match = False
            match_reverse = True
            for j in [0,1]:
                if nB[j] not in pairs_dict or pairs_dict[nB[j]] != nA[-j]:
                    match_reverse = False
            if match or match_reverse:
                if parameterB.id == parameterA.id:
                    #print('match', nA, nB, parameterA.id, parameterB.id)
                    break
        else:
            #print('parameterA is unique to molA', nA, parameterA.id, nB, parameterB.id)
            bondsB.append(parameterB.id)
    return bondsA, bondsB

In [5]:
def get_unmatched_angles(ffA, ffB, pairs_dict):
    '''
    Returns list of bonds parameters which are unique to molA given the pairs_dict
    '''

    anglesA = []
    for nA, parameterA in ffA[0]['Angles'].items():
        for nB, parameterB in ffB[0]['Angles'].items():
            match = True
            for j in [0,1,2]:
                if nA[j] not in pairs_dict or pairs_dict[nA[j]] != nB[j]:
                    match = False
            match_reverse = True
            for j in [0,1,2]:
                if nA[j] not in pairs_dict or pairs_dict[nA[j]] != nB[-j]:
                    match_reverse = False
            if match or match_reverse:
                if parameterA.id == parameterB.id:
                    #print('match', nA, nB, parameterA.id, parameterB.id)
                    break
        else:
            #print('parameterA is unique to molA', nA, parameterA.id, nB, parameterB.id)
            anglesA.append(parameterA.id)
    
    # turn analysis around
    pairs_dict = {b: a for a, b in pairs_dict.items()}
    anglesB = []
    for nB, parameterB in ffB[0]['Angles'].items():
        for nA, parameterA in ffA[0]['Angles'].items():
            match = True
            for j in [0,1,2]:
                if nB[j] not in pairs_dict or pairs_dict[nB[j]] != nA[j]:
                    match = False
            match_reverse = True
            for j in [0,1,2]:
                if nB[j] not in pairs_dict or pairs_dict[nB[j]] != nA[-j]:
                    match_reverse = False
            if match or match_reverse:
                if parameterA.id == parameterB.id:
                    #print('match', nA, nB, parameterA.id, parameterB.id)
                    break
        else:
            #print('parameterA is unique to molA', nA, parameterA.id, nB, parameterB.id)
            anglesB.append(parameterB.id)
    return anglesA, anglesB

In [6]:
def get_unmatched_torsions(ffA, ffB, pairs_dict):
    '''
    Returns list of torsion parameters which are unique to molA given the pairs_dict
    '''
    torsionsA = []
    for nA, parameterA in ffA[0]['ProperTorsions'].items():
        for nB, parameterB in ffB[0]['ProperTorsions'].items():
            match = True
            for j in [1,2]:
                if nA[j] not in pairs_dict or pairs_dict[nA[j]] != nB[j]:
                    match = False
            match_reverse = True
            for j in [1,2]:
                if nA[j] not in pairs_dict or pairs_dict[nA[j]] != nB[-j]:
                    match_reverse = False
            if match or match_reverse:
                if parameterA.id == parameterB.id:
#                 print('match', nA, nB, parameterA, parameterB)
                    break
        else:
#             print('parameterA is unique to molA', nA, parameterA)
            torsionsA.append(parameterA.id)

    # turn analysis around
    pairs_dict = {b: a for a, b in pairs_dict.items()}
    torsionsB = []
    for nB, parameterB in ffB[0]['ProperTorsions'].items():
        for nA, parameterA in ffA[0]['ProperTorsions'].items():
            match = True
            for j in [1,2]:
                if nB[j] not in pairs_dict or pairs_dict[nB[j]] != nA[j]:
                    match = False
            match_reverse = True
            for j in [1,2]:
                if nB[j] not in pairs_dict or pairs_dict[nB[j]] != nA[-j]:
                    match_reverse = False
            if match or match_reverse:
                if parameterA.id == parameterB.id:
#                 print('match', nA, nB, parameterA, parameterB)
                    break
        else:
#             print('parameterA is unique to molA', nA, parameterA)
            torsionsB.append(parameterB.id)    
    return torsionsA, torsionsB

In [7]:
def get_unmatched_impropers(ffA, ffB, pairs_dict):
    '''
    Returns list of torsion parameters which are unique to molA given the pairs_dict
    '''
    impropersA = []
    for nA, parameterA in ffA[0]['ImproperTorsions'].items():
        for nB, parameterB in ffB[0]['ImproperTorsions'].items():
            match = True
            for j in [1,2]:
                if nA[j] not in pairs_dict or pairs_dict[nA[j]] != nB[j]:
                    match = False
            match_reverse = True
            for j in [1,2]:
                if nA[j] not in pairs_dict or pairs_dict[nA[j]] != nB[-j]:
                    match_reverse = False
            if match or match_reverse:
                if parameterA.id == parameterB.id:
#                 print('match', nA, nB, parameterA, parameterB)
                    break
        else:
#             print('parameterA is unique to molA', nA, parameterA)
            impropersA.append(parameterA.id)

    # turn analysis around
    pairs_dict = {b: a for a, b in pairs_dict.items()}
    impropersB = []
    for nB, parameterB in ffB[0]['ImproperTorsions'].items():
        for nA, parameterA in ffA[0]['ImproperTorsions'].items():
            match = True
            for j in [1,2]:
                if nB[j] not in pairs_dict or pairs_dict[nB[j]] != nA[j]:
                    match = False
            match_reverse = True
            for j in [1,2]:
                if nB[j] not in pairs_dict or pairs_dict[nB[j]] != nA[-j]:
                    match_reverse = False
            if match or match_reverse:
                if parameterA.id == parameterB.id:
#                 print('match', nA, nB, parameterA, parameterB)
                    break
        else:
#             print('parameterA is unique to molA', nA, parameterA)
            impropersB.append(parameterB.id)
    return impropersA, impropersB

In [8]:
def get_unmatched_vdw(ffA, ffB, pairs_dict):
    '''
    Returns list of torsion parameters which are unique to molA given the pairs_dict
    '''
    vanderwaalsA = []
    for nA, parameterA in ffA[0]['vdW'].items():
        for nB, parameterB in ffB[0]['vdW'].items():
            match = True
            if nA[0] not in pairs_dict or pairs_dict[nA[0]] != nB[0]:
                match = False
            if match:
                if parameterA.id == parameterB.id:
#                 print('match', nA, nB, parameterA, parameterB)
                    break
        else:
#             print('parameterA is unique to molA', nA[0], parameterA)
            vanderwaalsA.append(parameterA.id)
    
    # turn analysis around
    pairs_dict = {b: a for a, b in pairs_dict.items()}
    vanderwaalsB = []
    for nB, parameterB in ffB[0]['vdW'].items():
        for nA, parameterA in ffA[0]['vdW'].items():
            match = True
            if nB[0] not in pairs_dict or pairs_dict[nB[0]] != nA[0]:
                match = False
            if match:
                if parameterA.id == parameterB.id:
#                 print('match', nA, nB, parameterA, parameterB)
                    break
        else:
#             print('parameterA is unique to molA', nA[0], parameterA)
            vanderwaalsB.append(parameterB.id)
    return vanderwaalsA, vanderwaalsB

In [9]:
# change source to have the molecule sdfs and pairs.dat available
targets.set_data_dir('../../../02_benchmark_calculations/')
tgt = ''
    
for ff in ['openff-1.0.0.offxml', 'openff-2.0.0.offxml']:
    forcefield = ForceField(ff)
    for i, row in tqdm(all_edges.iterrows(), total=all_edges.shape[0]):
        if tgt != row['target']:
            tgt = row['target']
            lset = ligands.LigandSet(tgt)
        ligandA = row['ligandA']
        ligandB = row['ligandB']
        molA = lset[f'lig_{ligandA}'].get_molecule()
        try:
            molB = lset[f'lig_{ligandB}'].get_molecule()
        except: # fix because one molecule with terminal double bond cannot be read in without allow_undefined_stereo=False
            mol_supplier = Chem.SDMolSupplier(lset[f'lig_{ligandB}'].get_coordinate_file_path())
            molB = mol_supplier[0]
            molB = ligands.Molecule.from_rdkit(molB, allow_undefined_stereo=True)
        pairs = np.loadtxt(f'{targets.data_path}/{targets.get_target_dir(tgt)}/03_hybrid/edge_{ligandA}_{ligandB}/water/crd/pairs.dat')
        # decrement pairs to match rdkit counting from 0!
        pairs -= 1
        pairs_dict = {a: b for a, b in pairs}
        
        topA = Topology.from_molecules([molA])
        topB = Topology.from_molecules([molB])
        # Store the parameter assignment dict in the pandas dataframe for each molecule
        ffA = forcefield.label_molecules(topA)
        ffB = forcefield.label_molecules(topB)

        bondsA, bondsB = get_unmatched_bonds(ffA, ffB, pairs_dict)
        all_edges.loc[i, f'{ff}_bondsA'] = ','.join(sorted(bondsA))
        all_edges.loc[i, f'{ff}_bondsB'] = ','.join(sorted(bondsB))


        anglesA, anglesB = get_unmatched_angles(ffA, ffB, pairs_dict)
        all_edges.loc[i, f'{ff}_anglesA'] = ','.join(sorted(anglesA))
        all_edges.loc[i, f'{ff}_anglesB'] = ','.join(sorted(anglesB))


        torsionsA, torsionsB = get_unmatched_torsions(ffA, ffB, pairs_dict)
        all_edges.loc[i, f'{ff}_torsionsA'] = ','.join(sorted(torsionsA))
        all_edges.loc[i, f'{ff}_torsionsB'] = ','.join(sorted(torsionsB))

        impropersA, impropersB = get_unmatched_impropers(ffA, ffB, pairs_dict)
        all_edges.loc[i, f'{ff}_impropersA'] = ','.join(sorted(impropersA))
        all_edges.loc[i, f'{ff}_impropersB'] = ','.join(sorted(impropersB))

        vdwaalsA, vdwaalsB = get_unmatched_vdw(ffA, ffB, pairs_dict)
        all_edges.loc[i, f'{ff}_vdWA'] = ','.join(sorted(vdwaalsA))
        all_edges.loc[i, f'{ff}_vdWB'] = ','.join(sorted(vdwaalsB))
        
all_edges

  0%|          | 0/1128 [00:00<?, ?it/s]

  result[:] = values


  0%|          | 0/1128 [00:00<?, ?it/s]

Unnamed: 0,target,edge,ligandA,ligandB,unit,DDG_Exp.,dDDG_Exp.,DDG_OpenFF-1.0,dDDG_OpenFF-1.0,DDG_OpenFF-1.0_converged,...,openff-2.0.0.offxml_bondsA,openff-2.0.0.offxml_bondsB,openff-2.0.0.offxml_anglesA,openff-2.0.0.offxml_anglesB,openff-2.0.0.offxml_torsionsA,openff-2.0.0.offxml_torsionsB,openff-2.0.0.offxml_impropersA,openff-2.0.0.offxml_impropersB,openff-2.0.0.offxml_vdWA,openff-2.0.0.offxml_vdWB
jnk1_edge_17124-1_18631-1,jnk1,edge_17124-1_18631-1,17124-1,18631-1,kilocalories / mole,0.26,0.37000,1.190000,0.097930,1.190000,...,"b16,b19,b72,b84,b84,b84,b87,b87,b87","b85,b85,b87,b87,b87","a1,a1,a1,a10,a10,a10,a10,a2,a2,a2,a2,a2,a21,a2...","a11,a11,a11,a11,a2,a2,a21,a21,a21,a21,a21","t105,t105,t95,t95,t95",,"i1,i4","i1,i4","n11,n11,n11,n16,n18,n25,n3,n3,n3","n11,n11,n11,n7,n7"
jnk1_edge_17124-1_18634-1,jnk1,edge_17124-1_18634-1,17124-1,18634-1,kilocalories / mole,-0.33,0.29000,0.580000,0.127440,0.580000,...,"b72,b87,b87,b87","b85,b87,b87,b87","a10,a10,a21,a21,a21,a21,a21","a11,a11,a21,a21,a21,a21,a21",,,"i1,i4","i1,i4","n11,n11,n11,n25","n11,n11,n11,n7"
jnk1_edge_18626-1_18624-1,jnk1,edge_18626-1_18624-1,18626-1,18624-1,kilocalories / mole,0.38,0.21000,0.556667,0.095340,0.556667,...,"b5,b5,b5,b5,b70,b87,b87,b87","b5,b5,b5,b5,b85,b87,b87,b87","a10,a10,a10,a10,a10,a10,a21,a21,a21,a21,a21","a10,a10,a10,a10,a11,a11,a21,a21,a21,a21,a21",,,"i1,i1,i1,i1,i4","i1,i1,i1,i1,i4","n11,n11,n11,n24","n11,n11,n11,n7"
jnk1_edge_18626-1_18625-1,jnk1,edge_18626-1_18625-1,18626-1,18625-1,kilocalories / mole,0.77,0.21000,-0.030000,0.109451,-0.030000,...,"b70,b85,b87,b87,b87","b70,b85,b87,b87,b87","a10,a10,a11,a11,a21,a21,a21,a21,a21","a10,a10,a11,a11,a21,a21,a21,a21,a21",,,"i1,i1,i4","i1,i1,i4","n11,n11,n11,n24,n7","n11,n11,n11,n24,n7"
jnk1_edge_18626-1_18627-1,jnk1,edge_18626-1_18627-1,18626-1,18627-1,kilocalories / mole,0.39,0.22000,0.140000,0.046623,0.140000,...,"b5,b5,b5,b5,b70,b85,b87,b87,b87","b5,b5,b5,b5,b70,b85,b87,b87,b87","a10,a10,a10,a10,a10,a10,a11,a11,a21,a21,a21,a2...","a10,a10,a10,a10,a10,a10,a11,a11,a21,a21,a21,a2...",,,"i1,i1,i1,i1,i4","i1,i1,i1,i1,i4","n11,n11,n11,n24,n7","n11,n11,n11,n24,n7"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tnks2_edge_8e_8a,tnks2,edge_8e_8a,8e,8a,kilocalories / mole,-0.56,0.39598,-1.786667,0.244131,-1.786667,...,"b1,b1,b2,b5,b5,b5,b5,b68,b84,b84,b84,b84,b84,b...","b5,b5,b5,b5,b7,b7,b8,b84,b84,b84,b84,b84,b84,b...","a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a...","a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a10,a10,a1...","t1,t1,t1,t1,t1,t1,t17,t17,t17,t17,t17,t17,t2,t...","t64,t64,t64,t64,t64,t64,t64,t64,t64,t64,t64,t6...","i1,i1,i1,i1","i1,i1,i1,i1,i4","n11,n11,n16,n2,n2,n2,n2,n2,n23,n6,n6,n6,n7,n7","n11,n11,n20,n3,n3,n3,n3,n6,n6,n6,n7,n7,n7"
tnks2_edge_8e_8b,tnks2,edge_8e_8b,8e,8b,kilocalories / mole,-1.69,0.39598,-2.750000,0.477929,-2.750000,...,"b1,b1,b2,b68,b84,b87,b87","b2,b7,b7,b8,b84,b84,b84,b87,b87","a1,a1,a1,a1,a1,a1,a19,a19,a19,a21,a21","a1,a1,a1,a19,a19,a19,a2,a2,a2,a20,a20,a20,a21,a21","t1,t1,t1,t1,t1,t1,t17,t17,t17,t17,t17,t17,t2,t...","t17,t17,t17,t17,t17,t17,t64,t64,t64,t64,t64,t6...",,i4,"n11,n11,n16,n2,n2,n2,n2,n2,n23","n11,n11,n16,n2,n2,n2,n20,n3,n3,n3,n3"
tnks2_edge_8e_8c,tnks2,edge_8e_8c,8e,8c,kilocalories / mole,0.12,0.39598,0.153333,0.176103,,...,"b7,b84,b84,b84,b84,b87,b87","b84,b87,b87,b87","a1,a1,a1,a1,a1,a1,a18,a18,a19,a19,a19,a2,a2,a2...","a1,a1,a1,a19,a19,a19,a19,a19,a21,a21","t50,t50,t50,t50,t50,t50,t50,t50,t50",,,,"n11,n11,n16,n2,n6,n6,n6","n11,n11,n11,n2"
tnks2_edge_8f_5m,tnks2,edge_8f_5m,8f,5m,kilocalories / mole,-0.89,0.39598,-2.433333,0.241366,,...,"b1,b1,b11,b4,b5,b5,b5,b5,b5,b7,b7,b7,b84,b84,b...","b11,b14,b4,b5,b5,b5,b5,b5,b84,b84,b87,b88","a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a1,a...","a1,a1,a1,a1,a1,a10,a10,a10,a10,a10,a10,a10,a10...","t1,t1,t1,t1,t1,t1,t2,t2,t3,t3,t3,t3,t3,t3,t3,t...","t47,t47,t47,t47,t75,t86,t9,t9,t9,t9,t9,t9,t93,...","i1,i1,i1,i7","i1,i1,i1,i7","n11,n11,n16,n16,n16,n2,n20,n6,n6,n6,n6,n6,n6,n6","n11,n12,n19,n2,n2"


## Save results to csv and yaml (per target) to read it in later analyses

In [10]:
all_edges.columns

Index(['target', 'edge', 'ligandA', 'ligandB', 'unit', 'DDG_Exp.', 'dDDG_Exp.',
       'DDG_OpenFF-1.0', 'dDDG_OpenFF-1.0', 'DDG_OpenFF-1.0_converged',
       'dDDG_OpenFF-1.0_converged', 'DDG_OpenFF-1.2', 'dDDG_OpenFF-1.2',
       'DDG_OpenFF-1.2_converged', 'dDDG_OpenFF-1.2_converged',
       'DDG_OpenFF-2.0', 'dDDG_OpenFF-2.0', 'DDG_OpenFF-2.0_converged',
       'dDDG_OpenFF-2.0_converged', 'DDG_OPLS3e', 'dDDG_OPLS3e', 'DDG_GAFF2',
       'dDDG_GAFF2', 'DDG_cGenFF', 'dDDG_cGenFF',
       'DDG_Consensus_OpenFF_GAFF2_cGenFF',
       'dDDG_Consensus_OpenFF_GAFF2_cGenFF', 'DDG_Consensus_OpenFF_GAFF2',
       'dDDG_Consensus_OpenFF_GAFF2', 'DDG_Consensus_all',
       'dDDG_Consensus_all', 'error_OpenFF-1.0', 'abserror_OpenFF-1.0',
       'error_OpenFF-1.0_converged', 'abserror_OpenFF-1.0_converged',
       'error_OpenFF-1.2', 'abserror_OpenFF-1.2', 'error_OpenFF-1.2_converged',
       'abserror_OpenFF-1.2_converged', 'error_OpenFF-2.0',
       'abserror_OpenFF-2.0', 'error_OpenFF-2.0_con

In [11]:
import json
with open("../01_inspect/old-id-to-new-id-map.json") as file:
    map_id = json.load(file)
map_id

{'c1': 'c1',
 'b1': 'b1',
 'b2': 'b2',
 'b3': 'b3',
 'b4': 'b4',
 'b5': 'b5',
 'b6': 'b6',
 'b7': 'b7',
 'b8': 'b8',
 'b9': 'b9',
 'b10': 'b10',
 'b11': 'b11',
 'b12': 'b12',
 'b13': 'b13',
 'b14': 'b14',
 'b14a': 'b15',
 'b15': 'b16',
 'b16': 'b17',
 'b17': 'b18',
 'b18': 'b19',
 'b19': 'b20',
 'b20': 'b21',
 'b21': 'b22',
 'b22': 'b23',
 'b23': 'b24',
 'b24': 'b25',
 'b25': 'b26',
 'b26': 'b27',
 'b27': 'b28',
 'b28': 'b29',
 'b29': 'b30',
 'b30': 'b31',
 'b31a': 'b32',
 'b31': 'b33',
 'b32': 'b34',
 'b33': 'b35',
 'b34': 'b36',
 'b35': 'b37',
 'b36': 'b38',
 'b36a': 'b39',
 'b37': 'b40',
 'b38': 'b41',
 'b39': 'b42',
 'b40': 'b43',
 'b41': 'b44',
 'b42': 'b45',
 'b43': 'b46',
 'b44': 'b47',
 'b45': 'b48',
 'b46': 'b49',
 'b47': 'b50',
 'b48': 'b51',
 'b49': 'b52',
 'b50': 'b53',
 'b51': 'b54',
 'b52': 'b55',
 'b53': 'b56',
 'b54': 'b57',
 'b55': 'b58',
 'b56': 'b59',
 'b58': 'b60',
 'b59': 'b61',
 'b60': 'b62',
 'b61': 'b63',
 'b62': 'b64',
 'b63': 'b65',
 'b65': 'b66',
 'b66': 'b67

In [12]:
all_edges.head()

Unnamed: 0,target,edge,ligandA,ligandB,unit,DDG_Exp.,dDDG_Exp.,DDG_OpenFF-1.0,dDDG_OpenFF-1.0,DDG_OpenFF-1.0_converged,...,openff-2.0.0.offxml_bondsA,openff-2.0.0.offxml_bondsB,openff-2.0.0.offxml_anglesA,openff-2.0.0.offxml_anglesB,openff-2.0.0.offxml_torsionsA,openff-2.0.0.offxml_torsionsB,openff-2.0.0.offxml_impropersA,openff-2.0.0.offxml_impropersB,openff-2.0.0.offxml_vdWA,openff-2.0.0.offxml_vdWB
jnk1_edge_17124-1_18631-1,jnk1,edge_17124-1_18631-1,17124-1,18631-1,kilocalories / mole,0.26,0.37,1.19,0.09793,1.19,...,"b16,b19,b72,b84,b84,b84,b87,b87,b87","b85,b85,b87,b87,b87","a1,a1,a1,a10,a10,a10,a10,a2,a2,a2,a2,a2,a21,a2...","a11,a11,a11,a11,a2,a2,a21,a21,a21,a21,a21","t105,t105,t95,t95,t95",,"i1,i4","i1,i4","n11,n11,n11,n16,n18,n25,n3,n3,n3","n11,n11,n11,n7,n7"
jnk1_edge_17124-1_18634-1,jnk1,edge_17124-1_18634-1,17124-1,18634-1,kilocalories / mole,-0.33,0.29,0.58,0.12744,0.58,...,"b72,b87,b87,b87","b85,b87,b87,b87","a10,a10,a21,a21,a21,a21,a21","a11,a11,a21,a21,a21,a21,a21",,,"i1,i4","i1,i4","n11,n11,n11,n25","n11,n11,n11,n7"
jnk1_edge_18626-1_18624-1,jnk1,edge_18626-1_18624-1,18626-1,18624-1,kilocalories / mole,0.38,0.21,0.556667,0.09534,0.556667,...,"b5,b5,b5,b5,b70,b87,b87,b87","b5,b5,b5,b5,b85,b87,b87,b87","a10,a10,a10,a10,a10,a10,a21,a21,a21,a21,a21","a10,a10,a10,a10,a11,a11,a21,a21,a21,a21,a21",,,"i1,i1,i1,i1,i4","i1,i1,i1,i1,i4","n11,n11,n11,n24","n11,n11,n11,n7"
jnk1_edge_18626-1_18625-1,jnk1,edge_18626-1_18625-1,18626-1,18625-1,kilocalories / mole,0.77,0.21,-0.03,0.109451,-0.03,...,"b70,b85,b87,b87,b87","b70,b85,b87,b87,b87","a10,a10,a11,a11,a21,a21,a21,a21,a21","a10,a10,a11,a11,a21,a21,a21,a21,a21",,,"i1,i1,i4","i1,i1,i4","n11,n11,n11,n24,n7","n11,n11,n11,n24,n7"
jnk1_edge_18626-1_18627-1,jnk1,edge_18626-1_18627-1,18626-1,18627-1,kilocalories / mole,0.39,0.22,0.14,0.046623,0.14,...,"b5,b5,b5,b5,b70,b85,b87,b87,b87","b5,b5,b5,b5,b70,b85,b87,b87,b87","a10,a10,a10,a10,a10,a10,a11,a11,a21,a21,a21,a2...","a10,a10,a10,a10,a10,a10,a11,a11,a21,a21,a21,a2...",,,"i1,i1,i1,i1,i4","i1,i1,i1,i1,i4","n11,n11,n11,n24,n7","n11,n11,n11,n24,n7"


In [13]:
for i, row in all_edges.iterrows():
    for forcefield in ['openff-1.0.0.offxml', 'openff-2.0.0.offxml']:
        parameters = []
        for t in ['bonds', 'angles', 'torsions', 'impropers', 'vdW']:
            for s in ['A', 'B']:
                if (not pd.isna(row[f"{forcefield}_{t}{s}"])) and (row[f"{forcefield}_{t}{s}"] != ''):
                    parameters.append(row[f"{forcefield}_{t}{s}"])
        for p in set(",".join(parameters).split(",")):
            new_p = p
            if forcefield == 'openff-1.0.0.offxml':
                new_p = map_id[p]
            if f'{forcefield}_{new_p}' not in list(all_edges.columns):
                all_edges[f'{forcefield}_{new_p}'] = False
            all_edges.loc[i, f'{forcefield}_{new_p}'] = True
all_edges.head()

Unnamed: 0,target,edge,ligandA,ligandB,unit,DDG_Exp.,dDDG_Exp.,DDG_OpenFF-1.0,dDDG_OpenFF-1.0,DDG_OpenFF-1.0_converged,...,openff-1.0.0.offxml_b36,openff-1.0.0.offxml_t137,openff-2.0.0.offxml_b36,openff-2.0.0.offxml_t90,openff-2.0.0.offxml_t139,openff-2.0.0.offxml_t137,openff-1.0.0.offxml_t156,openff-1.0.0.offxml_t155,openff-2.0.0.offxml_t156,openff-2.0.0.offxml_t155
jnk1_edge_17124-1_18631-1,jnk1,edge_17124-1_18631-1,17124-1,18631-1,kilocalories / mole,0.26,0.37,1.19,0.09793,1.19,...,False,False,False,False,False,False,False,False,False,False
jnk1_edge_17124-1_18634-1,jnk1,edge_17124-1_18634-1,17124-1,18634-1,kilocalories / mole,-0.33,0.29,0.58,0.12744,0.58,...,False,False,False,False,False,False,False,False,False,False
jnk1_edge_18626-1_18624-1,jnk1,edge_18626-1_18624-1,18626-1,18624-1,kilocalories / mole,0.38,0.21,0.556667,0.09534,0.556667,...,False,False,False,False,False,False,False,False,False,False
jnk1_edge_18626-1_18625-1,jnk1,edge_18626-1_18625-1,18626-1,18625-1,kilocalories / mole,0.77,0.21,-0.03,0.109451,-0.03,...,False,False,False,False,False,False,False,False,False,False
jnk1_edge_18626-1_18627-1,jnk1,edge_18626-1_18627-1,18626-1,18627-1,kilocalories / mole,0.39,0.22,0.14,0.046623,0.14,...,False,False,False,False,False,False,False,False,False,False


In [14]:
for forcefield in ['openff-1.0.0.offxml', 'openff-2.0.0.offxml']:
    for t in ['bonds', 'angles', 'torsions', 'impropers', 'vdW']:
        for s in ['A', 'B']:
            all_edges.drop(f"{forcefield}_{t}{s}", axis=1, inplace=True)
all_edges.shape

(1128, 454)

In [15]:
all_edges = all_edges.reindex(sorted(all_edges.columns), axis=1)

In [16]:
all_edges.to_csv('01f_all_edges_all_ffs_parameters.csv')

In [17]:
all_edges.columns

Index(['DDG_Consensus_OpenFF_GAFF2', 'DDG_Consensus_OpenFF_GAFF2_cGenFF',
       'DDG_Consensus_all', 'DDG_Exp.', 'DDG_GAFF2', 'DDG_OPLS3e',
       'DDG_OpenFF-1.0', 'DDG_OpenFF-1.0_converged', 'DDG_OpenFF-1.2',
       'DDG_OpenFF-1.2_converged',
       ...
       'openff-2.0.0.offxml_t86', 'openff-2.0.0.offxml_t9',
       'openff-2.0.0.offxml_t90', 'openff-2.0.0.offxml_t93',
       'openff-2.0.0.offxml_t94', 'openff-2.0.0.offxml_t95',
       'openff-2.0.0.offxml_t96', 'openff-2.0.0.offxml_t97', 'target', 'unit'],
      dtype='object', length=454)