In [None]:
# import libraries
import BioSimSpace as BSS
import sys
import os
from sklearn.metrics import mean_absolute_error as MAE

if "/home/anna/Documents/cinnabar" not in sys.path:
    sys.path.insert(1, "/home/anna/Documents/cinnabar")
import cinnabar

print("adding code to the pythonpath...")
code = "/home/anna/Documents/code/python"
if code not in sys.path:
    sys.path.insert(1, code)
import pipeline

print(cinnabar.__file__)

from pipeline import *
from pipeline.utils import validate
from pipeline.analysis import *

In [None]:
bench_folder = f"/home/anna/Documents/benchmark"
protein = "tyk2"
main_dir = f"{bench_folder}/extracted/{protein}"

# choose location for the files
net_file = f"{main_dir}/execution_model/network_lomap.dat"
ana_file = f"{main_dir}/execution_model/analysis_protocol.dat"
exp_file = f"{bench_folder}/inputs/experimental/{protein}.yml"

if os.path.exists(f"{main_dir}/outputs_extracted/results"):
    results_folder = f"{main_dir}/outputs_extracted/results"
elif os.path.exists(f"{main_dir}/outputs/results"):
    results_folder = f"{main_dir}/outputs/results"
else:
    raise ValueError(
        f"results directory not found in the {main_dir}. please make sure results were written using the analysis script previously in the pipeline"
    )

output_folder = validate.folder_path(f"{main_dir}/analysis", create=True)

all_analysis_object = analysis_network(
    results_folder,
    exp_file=exp_file,
    net_file=net_file,
    output_folder=output_folder,
    analysis_ext=ana_file,
)

# can add any other results files
# all_analysis_object.compute_other_results(file_name=None, name=None)
all_analysis_object.compute(cycle_closure=True)

In [None]:
all_analysis_object.normalised_exper_val_dict

In [None]:
all_analysis_object._add_fwf_path(
    "/home/anna/Documents/september_2022_workshops/freenrgworkflows/networkanalysis"
)

eng = "SOMD"

# get the experimental dictionary
exp_dicts = all_analysis_object._get_exp_fwf()
for key in exp_dicts[0]:
    print(f"{key} : {exp_dicts[0][key][0]}, {exp_dicts[0][key][1]}")

# get the network analysis
fwf_dict = all_analysis_object._get_ana_fwf(engine=eng)
for key in fwf_dict:
    print(f"{key} : {fwf_dict[key][0]}, {fwf_dict[key][1]}")

dict_y = fwf_dict
dict_x = all_analysis_object.cinnabar_calc_val_dict[eng]
dict_exp = exp_dicts[0]

df1 = plotting_engines.match_dicts_to_df(dict_x, dict_y, "cinnabar", "fwf")
df2 = plotting_engines.match_dicts_to_df(
    dict_exp, dict_x, "experimental", "cinnabar"
).drop(columns=["freenrg_cinnabar", "err_cinnabar"])
df3 = df1.join(df2)
df3

In [None]:
df3.fillna(0)
df3.plot.bar(
    y=["freenrg_fwf", "freenrg_cinnabar", "freenrg_experimental"],
    yerr=df3[["err_fwf", "err_cinnabar", "err_experimental"]].T.values,
    title=f"fwf, cinnabar, experimental, {eng}",
    xlabel="ligands",
    ylabel="dG (kcal/mol)",
)
df3.dropna()
# df["values"] = df.index

print("cinnabar")

df3.plot.scatter(
    x="freenrg_experimental",
    y="freenrg_cinnabar",
    # xerr="err_experimental",
    yerr="err_cinnabar",
    title=f"cinnabar, {eng}",
    xlabel="experimental dG (kcal/mol)",
    ylabel="cinnabar dG (kcal/mol)",
)

# cinnabar correlation
print("mae")
c_mae = MAE(df3["freenrg_experimental"], df3["freenrg_cinnabar"])
print(c_mae)
print("mue")
print(all_analysis_object._stats_object.compute_mue(pert_val="val", y=eng))
c_mae_2 = all_analysis_object._stats_object._compute_stats(
    x=df3["freenrg_experimental"],
    y=df3["freenrg_cinnabar"],
    #  xerr=df3["err_experimental"],
    yerr=df3["err_cinnabar"],
    statistic="MUE",
)
print(c_mae_2)
print("ktau")
print(all_analysis_object._stats_object.compute_ktau(pert_val="val", y=eng))

print("fwf")

df3.plot.scatter(
    x="freenrg_experimental",
    y="freenrg_fwf",
    # xerr="err_experimental",
    yerr="err_fwf",
    title=f"fwf, {eng}",
    xlabel="experimental dG (kcal/mol)",
    ylabel="fwf dG (kcal/mol)",
)


# fwf correlation
fwf_stats = all_analysis_object._get_stats_fwf(engine=eng)

print("mae")
f_mae = MAE(df3["freenrg_experimental"], df3["freenrg_fwf"])
print(f_mae)

# calculating using the cinnabar stats
f_mae_2 = all_analysis_object._stats_object._compute_stats(
    x=df3["freenrg_experimental"],
    y=df3["freenrg_fwf"],
    #  xerr=df3["err_experimental"],
    yerr=df3["err_fwf"],
    statistic="MUE",
)
print(f_mae_2)

print("between both net ana methods")

df3.plot.scatter(
    x="freenrg_cinnabar",
    y="freenrg_fwf",
    xerr="err_cinnabar",
    yerr="err_fwf",
    title=f"fwf, {eng}",
    xlabel="cinnabar dG (kcal/mol)",
    ylabel="fwf dG (kcal/mol)",
)

b_mae = all_analysis_object._stats_object._compute_stats(
    x=df3["freenrg_cinnabar"],
    y=df3["freenrg_fwf"],
    xerr=df3["err_cinnabar"],
    yerr=df3["err_fwf"],
    statistic="MUE",
)
print(b_mae)

# comparing the different networks and systems in terms of failed and successful runs


In [None]:
# import libraries

from scipy.stats import sem as sem
import sys
import os
import glob

if "/home/anna/BioSimSpace/python" not in sys.path:
    sys.path.insert(1, "/home/anna/BioSimSpace/python")
import BioSimSpace as BSS

if "/home/anna/Documents/cinnabar" not in sys.path:
    sys.path.insert(1, "/home/anna/Documents/cinnabar")
import cinnabar

print("adding code to the pythonpath...")
code = "/home/anna/Documents/code/python"
if code not in sys.path:
    sys.path.insert(1, code)
import pipeline

print(cinnabar.__file__)

from pipeline import *
from pipeline.utils import validate
from pipeline.analysis import *

In [None]:
nets = ["lomap", "rbfenn"]
prots = ["tyk2", "mcl1", "p38"]
ana_obj_dict = {}

results_dict = {}
val_results_dict = {}
pert_results_dict = {}
disconnect_results_dict = {}
val_disconnect_results_dict = {}

for prot in prots:
    ana_obj_dict[prot] = {}

    results_dict[prot] = {}
    val_results_dict[prot] = {}
    pert_results_dict[prot] = {}
    disconnect_results_dict[prot] = {}
    val_disconnect_results_dict[prot] = {}

    for net in nets:
        results_dict[prot][net] = {}
        val_results_dict[prot][net] = {}
        pert_results_dict[prot][net] = {}
        disconnect_results_dict[prot][net] = {}
        val_disconnect_results_dict[prot][net] = {}

        bench_folder = f"/home/anna/Documents/benchmark"
        protein = prot
        main_dir = f"{bench_folder}/extracted/{protein}"
        # choose location for the files
        net_file = f"{main_dir}/execution_model/network_{net}.dat"
        ana_file = f"{main_dir}/execution_model/analysis_protocol.dat"
        exp_file = f"{bench_folder}/inputs/experimental/{protein}.yml"
        results_folder = f"{main_dir}/outputs_extracted/results"
        output_folder = validate.folder_path(f"{main_dir}/analysis", create=True)

        all_analysis_object = analysis_network(
            results_folder,
            exp_file=exp_file,
            net_file=net_file,
            output_folder=output_folder,
            analysis_ext=ana_file,
        )

        # can add any other results files
        # all_analysis_object.compute_other_results(file_name=None, name=None)
        all_analysis_object.compute(cycle_closure=False)
        ana_obj_dict[prot][net] = all_analysis_object

        print(f"no of ligands for {prot} is {len(all_analysis_object.ligands)}")

        for eng in all_analysis_object.engines:
            val, percen, perturbations = all_analysis_object.successful_runs(eng)

            results_dict[prot][net][eng] = percen
            val_results_dict[prot][net][eng] = val
            pert_results_dict[prot][net][eng] = perturbations

            gra = net_graph(all_analysis_object.ligands, perturbations)
            print(f"{prot}, {net}, {eng}")
            print(gra.disconnected_ligands())
            no_ligs = len(gra.disconnected_ligands())
            percen = (
                len(gra.disconnected_ligands()) / len(all_analysis_object.ligands)
            ) * 100
            disconnect_results_dict[prot][net][eng] = percen
            val_disconnect_results_dict[prot][net][eng] = no_ligs

In [None]:
for prot in prots:
    df = pd.DataFrame.from_dict(results_dict[prot])
    df.plot.bar(color=["teal", "hotpink"])
    plt.title(f"{prot}")
    plt.xlabel("engine")
    plt.ylabel("percentage")

# plot for all protiens per engine
combined_dict = {}
val_combined_dict = {}

for net in nets:
    combined_dict[net] = {}
    val_combined_dict[net] = {}
    for eng in all_analysis_object.engines:
        vals = 0
        val_vals = 0
        for prot in prots:
            vals += results_dict[prot][net][eng]
            val_vals += val_results_dict[prot][net][eng]
        av_val = vals / len(all_analysis_object.engines)
        combined_dict[net][eng] = av_val
        val_combined_dict[net][eng] = val_vals

df = pd.DataFrame.from_dict(combined_dict)
df.plot.bar(color=["teal", "hotpink"])
plt.title(f"all proteins")
plt.xlabel("engine")
plt.ylabel("percentage")


# plot for all engines across all proteins
combined_dict = {}
val_combined_dict = {}

for net in nets:
    combined_dict[net] = {}
    val_combined_dict[net] = {}
    vals = 0
    val_vals = 0
    for eng in all_analysis_object.engines:
        for prot in prots:
            vals += results_dict[prot][net][eng]
            val_vals += val_results_dict[prot][net][eng]
    av_val = vals / (len(all_analysis_object.engines) * len(prots))
    combined_dict[net][""] = av_val
    val_combined_dict[net][""] = val_vals

df = pd.DataFrame.from_dict(combined_dict)
df.plot.bar(color=["teal", "hotpink"])
plt.title(f"all proteins and engines")
plt.xlabel("network")
plt.ylabel("percentage")

# plot for all protiens per engine for disconnected ligands
combined_dict = {}
val_combined_dict = {}

for net in nets:
    combined_dict[net] = {}
    val_combined_dict[net] = {}
    for eng in all_analysis_object.engines:
        vals = 0
        val_vals = 0
        for prot in prots:
            vals += disconnect_results_dict[prot][net][eng]
            val_vals += val_disconnect_results_dict[prot][net][eng]
        av_val = vals / len(all_analysis_object.engines)
        combined_dict[net][eng] = av_val
        val_combined_dict[net][eng] = val_vals

df = pd.DataFrame.from_dict(combined_dict)
df.plot.bar(color=["teal", "hotpink"])
plt.title(f"disconnected ligands for all proteins")
plt.xlabel("engine")
plt.ylabel("percentage")

In [None]:
for prot in prots:
    for net in nets:
        print(f"{prot}, {net}")
        ana_obj = ana_obj_dict[prot][net]

        # ana_obj.plot_scatter_lig()

        print(ana_obj.calc_mae(pert_val="val"))

        # for eng in ana_obj.engines:
        #     print(f"{eng}")
        #     print(ana_obj.compute_mue(pert_val="val", engines=eng))

In [None]:
# network specific runs and their failures

nets = ["lomap", "rbfenn", "shared"]
prots = ["tyk2", "mcl1", "p38"]
results_dict = {}
val_results_dict = {}

for prot in prots:
    results_dict[prot] = {}
    val_results_dict[prot] = {}

    bench_folder = f"/home/anna/Documents/benchmark"
    protein = prot
    main_dir = f"{bench_folder}/extracted/{protein}"
    # choose location for the files
    net_file = f"{main_dir}/execution_model/network_combined.dat"
    ana_file = f"{main_dir}/execution_model/analysis_protocol.dat"
    exp_file = f"{bench_folder}/inputs/experimental/{protein}.yml"
    results_folder = f"{main_dir}/outputs_extracted/results"
    output_folder = validate.folder_path(f"{main_dir}/analysis", create=True)

    all_analysis_object = analysis_network(
        results_folder,
        exp_file=exp_file,
        net_file=net_file,
        output_folder=output_folder,
        analysis_ext=ana_file,
    )

    # can add any other results files
    # all_analysis_object.compute_other_results(file_name=None, name=None)
    all_analysis_object.compute(cycle_closure=False)

    print(f"{prot}")
    print("pert")
    print(all_analysis_object.calc_mae(pert_val="pert"))
    print("val")
    print(all_analysis_object.calc_mae(pert_val="val"))

    for net in nets:
        results_dict[prot][net] = {}
        val_results_dict[prot][net] = {}

        perts = []

        with open(f"{main_dir}/execution_model/unique_perts.dat") as file:
            for line in file:
                if line.split(",")[-1].strip() == net:
                    perts.append(line.split(",")[0].strip())

        for eng in all_analysis_object.engines:
            val, percen, perturbations = all_analysis_object.successful_runs(eng, perts)

            results_dict[prot][net][eng] = percen
            val_results_dict[prot][net][eng] = val

print(val_results_dict)
results_dict

# plot for all protiens per engine
combined_dict = {}
val_combined_dict = {}

for net in nets:
    combined_dict[net] = {}
    val_combined_dict[net] = {}
    for eng in all_analysis_object.engines:
        vals = 0
        val_vals = 0
        for prot in prots:
            vals += results_dict[prot][net][eng]
            val_vals += val_results_dict[prot][net][eng]
        av_val = vals / len(all_analysis_object.engines)
        combined_dict[net][eng] = av_val
        val_combined_dict[net][eng] = val_vals

df = pd.DataFrame.from_dict(combined_dict)
df.plot.bar(color=["teal", "hotpink", "darkblue"])
plt.title(f"all proteins")
plt.xlabel("engine")
plt.ylabel("percentage")

In [None]:
# plot for all protiens per engine
combined_dict = {}
net = "lomap"
prot_dict = {
    "tyk2": "TYK2",
    "p38": "P38α",
    "mcl1": "MCL1",
}
eng_dict = {"GROMACS": "GMX", "AMBER": "PMEMD", "SOMD": "SOMD"}
for eng in ["AMBER", "GROMACS", "SOMD"]:
    combined_dict[eng_dict[eng]] = {}
    for prot in prots:
        combined_dict[eng_dict[eng]][prot_dict[prot]] = (
            100 - results_dict[prot][net][eng]
        )

df = pd.DataFrame.from_dict(combined_dict)
df.plot.bar(color=["orange", "orchid", "darkturquoise"])
# plt.title(f"all proteins"
plt.xlabel("protein")
plt.ylabel("failed runs (%)")