In [1]:
import pandas as pd
from utility import get_all_stats

CALC_TYPES = [
    "mbar",
    "dg-c2-pb",
    "dg-c2-gb",
    "dg-ie-pb",
    "dg-ie-gb",
    "dh-pb",
    "dh-gb",
]


# Define the custom orders
SYSTEM_ORDER = ["p38", "A2A", "ptp1b", "tyk2", "thrombin", "mcl1", "CyclophilinD", "SAMPL6-OA"]
FORCE_FIELD_ORDER = ['espaloma-0.3.1', 'gaff-2.11', 'openff-2.0.0']

SYSTEM_NAME = {
    "p38": "P38", 
    "A2A": "A2A",
    "ptp1b": "PTP1B",
    "tyk2": "TYK2",
    "thrombin": "Thrombin",
    "mcl1": "MCL1",
    "CyclophilinD": "CyclophilinD",
    "SAMPL6-OA": "SAMPL6-OA"
}


BindFlowData = pd.read_csv("BindFlow.csv", index_col=0)

columns = [
    "system",
    "ligand",
    "replica",
    "sample",
    "exp_dG",
    "exp_dG_error",
]
for CALC_TYPE in CALC_TYPES:
    columns += [
    f"simulation_{CALC_TYPE}_espaloma-0.3.1",
    f"simulation_{CALC_TYPE}_gaff-2.11",
    f"simulation_{CALC_TYPE}_openff-2.0.0",
    ]
BindFlowData = BindFlowData[columns]

BindFlowData.rename(
    columns={
        "system": "source",
    },
    inplace=True
)

Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality


In [2]:
mean = BindFlowData.groupby(["source", "ligand"]).mean().reset_index().drop(columns=["replica", "sample"])
sem = BindFlowData.groupby(["source", "ligand"]).sem().reset_index().drop(columns=["replica", "sample"])


# Filter DataFrame
mask = mean["simulation_mbar_espaloma-0.3.1"].notna()
mean = mean[mask]
sem = sem[mask]

sem.rename(
    columns={column: column.replace("simulation_", "sem_") for column in BindFlowData.columns},
    inplace=True
)
sem.drop(columns=["exp_dG", "exp_dG_error"], inplace=True)
df_merge = pd.merge(mean, sem, on=["source", "ligand"])
df_merge

Unnamed: 0,source,ligand,exp_dG,exp_dG_error,simulation_mbar_espaloma-0.3.1,simulation_mbar_gaff-2.11,simulation_mbar_openff-2.0.0,simulation_dg-c2-pb_espaloma-0.3.1,simulation_dg-c2-pb_gaff-2.11,simulation_dg-c2-pb_openff-2.0.0,...,sem_dg-ie-pb_openff-2.0.0,sem_dg-ie-gb_espaloma-0.3.1,sem_dg-ie-gb_gaff-2.11,sem_dg-ie-gb_openff-2.0.0,sem_dh-pb_espaloma-0.3.1,sem_dh-pb_gaff-2.11,sem_dh-pb_openff-2.0.0,sem_dh-gb_espaloma-0.3.1,sem_dh-gb_gaff-2.11,sem_dh-gb_openff-2.0.0
0,A2A,4g,-11.13,0.00,-11.663549,-12.948251,-14.902108,-19.011367,-14.953111,-19.981053,...,0.277236,0.287613,0.289864,0.231922,0.268723,0.201350,0.188729,0.126405,0.203368,0.124536
1,A2A,4h,-10.72,0.00,-14.985823,-12.715193,-14.984288,-16.281790,-15.578446,-17.598608,...,0.324237,0.273521,0.221427,0.287595,0.258195,0.202226,0.197033,0.103359,0.130013,0.176150
2,A2A,4i,-10.38,0.00,-15.290971,-12.655867,-13.378213,-17.154691,-15.227383,-17.776400,...,0.447098,0.240556,0.358086,0.367270,0.315874,0.330958,0.257554,0.130565,0.253722,0.193323
3,A2A,4j,-10.95,0.00,-14.987652,-14.671048,-14.477255,-18.049032,-18.007832,-19.252044,...,0.348742,0.227143,0.256751,0.319838,0.178715,0.202845,0.222286,0.138198,0.129118,0.165912
4,A2A,4k,-11.61,0.00,-14.548687,-16.967877,-16.149837,-16.077743,-17.055115,-18.582039,...,0.331798,0.282044,0.200845,0.241440,0.305825,0.166427,0.216478,0.178724,0.130984,0.124937
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134,tyk2,lig_ejm_54,-10.63,0.18,-12.628961,-12.536255,-13.602891,-18.537698,-18.411806,-18.825066,...,0.289155,0.332322,0.341502,0.233166,0.173245,0.301666,0.207951,0.191287,0.214488,0.152792
135,tyk2,lig_ejm_55,-9.29,0.18,-15.239058,-9.660865,-12.039963,-18.139189,-15.789316,-18.612232,...,0.305236,0.267680,0.257594,0.297308,0.193402,0.190472,0.164450,0.184482,0.186038,0.134204
136,tyk2,lig_jmc_23,-11.81,0.18,-15.518173,-10.002343,-12.745145,-18.056509,-16.058322,-19.471638,...,0.228162,0.332376,0.366979,0.249415,0.254711,0.267633,0.124746,0.234803,0.259219,0.133197
137,tyk2,lig_jmc_27,-11.38,0.18,-15.064844,-11.333263,-12.784466,-22.197525,-20.279139,-21.950402,...,0.259832,0.274942,0.277554,0.250746,0.188065,0.201192,0.145557,0.241804,0.197458,0.161739


In [3]:

replicates = 10_000
confidence = 68

all_stats = pd.DataFrame()
for system in SYSTEM_ORDER:
    system_name = SYSTEM_NAME[system]
    df = get_all_stats(df_merge[df_merge["source"] == system], replicates=replicates, confidence=confidence)
    df["source"] = system_name
    all_stats = pd.concat([all_stats, df])


all_stats.to_csv("BindFlow-stats.csv")

Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality
Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality
Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality
Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality
Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality
Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality
Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - PandasTools will have limited functionality
Failed to find the pandas get_adjustment() function to patch
Failed to patch pandas - Pand