In [None]:
# PTRnet Ablation Study Results

In [1]:
import os
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import font_manager as fm
from scipy.interpolate import make_interp_spline

import vis_utils
from custom_colors import blue_shades

font_path = r'C:\Users\Felix\AppData\Local\Microsoft\Windows\Fonts\SourceSansPro-Regular.ttf'
fm.fontManager.addfont(font_path)
source_sans_pro = fm.FontProperties(fname=font_path)

plt.rcParams['font.family'] = source_sans_pro.get_name()

In [None]:
STORE = True
# STORE = False

In [2]:
# Load datafile
df_raw = pd.read_csv("data/runs-12_34_50-10-Jun-25-PTRnet-final.csv")
df_raw.loc[:, "pretrain"] = df_raw.experiment.str.contains("pretrain")
df_raw = df_raw[df_raw.pretrain == False]
df_raw.loc[:, "run_id_temp"] = df_raw.experiment.str.split("/").str[2]
df_raw.loc[:, "run_id"] = df_raw.run_id_temp.str.split("_").str[0]

In [None]:
df_raw

In [None]:
# Assign runnames to ids
run_names = {
    26: "baseline (tuned PTRnet)",
    44: "- codon frequencies",
    27: "- AUG alignment",
    28: "OHE of inputs",
    29: "concat tissue embeddings",
    30: "sequence only",
    31: "+ pretraining",
}

df_raw["run_id"] = df_raw["run_id"].astype(int)
df_raw.loc[:, "name"] = df_raw.run_id.map(run_names)

In [None]:
df = df_raw[["run", "run_id", "name", "AUC_train", "AUC_val", "F1_val", "Precision_val", "Recall_val", "Accuracy_val"]].copy()

In [None]:
# rename columns from _val to _test
df.rename(columns={
    "AUC_val": "AUC_test",
    "F1_val": "F1_test",
    "Precision_val": "Precision_test",
    "Recall_val": "Recall_test",
    "Accuracy_val": "Accuracy_test"
}, inplace=True)

In [None]:
# convert to float
cols_to_convert = [col for col in df.columns if col not in ['run', "run_id", "name"]]
df.loc[:, cols_to_convert] = df[cols_to_convert].astype(float)

In [None]:
df.sort_values(by="AUC_test", ascending=True, inplace=True)
df

In [None]:
fig, ax = plt.subplots(figsize=(7.5, 4))
bar_height = 0.35
y = range(len(df))

# Bars with spacing
ax.barh([i + bar_height / 2 for i in y], df["AUC_test"], height=bar_height, label='Test', color=blue_shades[4])
ax.barh([i - bar_height / 2 for i in y], df["AUC_train"], height=bar_height, label='Train', color=blue_shades[0])

# Labels and legend
ax.set_yticks(y)
ax.set_yticklabels(df.name)
ax.set_xlabel("AUC")
ax.set_title("PTRnet Ablation Study Results")
ax.legend()
# ax.set_xlim(0.5, max(df["AUC_test"].max(), df["AUC_train"].max()) * 1.035)
ax.set_xlim(min(df["AUC_test"].min(), df["AUC_train"].min()) * 0.95, max(df["AUC_test"].max(), df["AUC_train"].max()) * 1.02)
plt.tight_layout()

if STORE:
    plt.savefig(os.getenv("OUTPUT_DIR") + f"/ptrnet_ablation_barplot.pdf", format="pdf", bbox_inches="tight")

plt.show()

## All metrics table

In [None]:
# See benchmark_test_results.ipynb