# HELP performance in E vs NE problem

In [9]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

tissue = 'Lung'
problem = 'EvsAE'
method1 = 'HELP'
method2 = 'AdAM'
method3 = 'FiPer'
method4 = 'OGEE'

scorepath = '../scores_sv'
features = ["Bio", "N2V", "Bio+CCBeder", "Bio+CCcfs", "Bio+N2V", "Bio+CCcfs+N2V"]
suffixes = ['bio','n2v', 'bioccbeder', 'biocc', 'bion2v', 'bioccn2v']

# Upload input datasets
dfs = []
for feature,suffix in zip(features,suffixes):
    df_tmp = pd.read_csv(os.path.join(scorepath, f"score_{method1}_{tissue}_{problem}_{suffix}.csv")).drop(columns=["MCC", "CM"])
    #df_tmp = pd.read_csv(os.path.join(scorepath, f"score_{method1}_{tissue}_{problem}_{suffix}.csv")).drop(columns=["ROC-AUC", "Accuracy", "CM"])
    df_tmp['Feature'] = feature
    dfs += [df_tmp]

# Merge datasets and modify the dataframe for boxplot generation
Scores = pd.concat(dfs)
Scores = pd.melt(Scores, id_vars=["Feature"], var_name="Metric", value_name="Value")

# Set seaborn style
sns.set_context("poster")
#sns.set(style="whitegrid", palette="pastel")

# Generate boxplots
plt.figure(figsize=(18, 7))
sns.boxplot(x="Value", y="Metric", hue="Feature", data=Scores, dodge=True,
            linewidth=1.5, fliersize=5, saturation=0.75, notch=False, boxprops=dict(alpha=0.7))

plt.xlim(0.5, 1)
#plt.title("Boxplots of Classification Metrics", fontsize=18)
plt.xlabel("Value", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.ylabel("Metric", fontsize=18)
plt.legend(title="Features", fontsize="16", title_fontsize="18", loc="upper center", bbox_to_anchor=(0.11, 1))
plt.grid(axis="y", linestyle="--", alpha=0.7)

# Add vertical lines between groups of metric values
unique_metrics = Scores["Metric"].unique()
for i in range(1, len(unique_metrics)):
    plt.axvline(x=i - 0.5, color='lightgray', linestyle='--', linewidth=1)
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: '../scores_sv/score_HELP_Lung_EvsAE_bio.csv'

In [2]:
import pandas as pd
from tabulate import tabulate
import os
scorepath = '../scores_sv'
method1 = 'HELP'
tissue = 'Kidney'
problem = 'EvsNE'
df_p = pd.DataFrame()
df_means = pd.DataFrame()
#features = ["Bio", "N2V", "Bio+BPBeder", "Bio+CCBeder", "Bio+CCcfs", "Bio+N2V", "Bio+CCcfs+N2V"]
#suffixes = ['bio','n2v', 'biobpbeder', 'bioccbeder', 'biocc', 'bion2v', 'bioccn2v']
features = ["Bio", "N2V","Bio+CCBeder", "Bio+CCcfs", "Bio+N2V", "Bio+CCcfs+N2V"]
suffixes = ['bio','n2v', 'bioccbeder', 'biocc', 'bion2v', 'bioccn2v']
for feature, suffix in zip(features,suffixes):
    print(feature, suffix)
    df = pd.read_csv(os.path.join(scorepath, f"score_{method1}_{tissue}_{problem}_{suffix}.csv"))
    metrics = df.drop(columns=['CM']).columns
    means, stds = df.drop(columns=['CM']).mean(), df.drop(columns=['CM']).std()
    row_df = pd.DataFrame([[f"{means[m]:.4f}±{stds[m]:.4f}" for m in metrics]], columns=metrics, index=[feature])
    row_df.index.name = 'feature'
    row_df['CM'] = df['CM'].values[0].replace('\n', ',')
    df_means = pd.concat([df_means, pd.DataFrame([[f"{means[m]:.4f}" for m in metrics]], columns=metrics)])
    df_p = pd.concat([df_p, row_df])
df_means = df_means.set_index(pd.Index(data=features, name='feature'))

print(tabulate(df_p, headers=df_p.columns, tablefmt="latex"))
#df_means.to_csv(f"{problem}{tissue}_sv.csv")
df_p.drop(columns=['CM']).to_csv(f"../results/{problem}{tissue}_sv.csv")

Bio bio
N2V n2v
Bio+CCBeder bioccbeder
Bio+CCcfs biocc
Bio+N2V bion2v
Bio+CCcfs+N2V bioccn2v
\begin{tabular}{llllllll}
\hline
               & ROC-AUC       & Accuracy      & BA            & Sensitivity   & Specificity   & MCC           & CM                             \\
\hline
 Bio           & 0.9140±0.0009 & 0.8274±0.0018 & 0.8414±0.0024 & 0.8579±0.0051 & 0.8250±0.0021 & 0.4234±0.0032 & [[ 1068   174], [ 2783 13211]] \\
 N2V           & 0.9339±0.0006 & 0.8681±0.0009 & 0.8593±0.0015 & 0.8491±0.0029 & 0.8695±0.0010 & 0.4814±0.0024 & [[ 1054   188], [ 2078 13916]] \\
 Bio+CCBeder   & 0.9522±0.0003 & 0.8848±0.0008 & 0.8887±0.0009 & 0.8934±0.0015 & 0.8841±0.0008 & 0.5328±0.0019 & [[ 1112   130], [ 1853 14141]] \\
 Bio+CCcfs     & 0.9625±0.0005 & 0.9050±0.0005 & 0.9053±0.0013 & 0.9056±0.0032 & 0.9050±0.0007 & 0.5816±0.0011 & [[ 1120   122], [ 1497 14497]] \\
 Bio+N2V       & 0.9529±0.0008 & 0.8829±0.0014 & 0.8912±0.0012 & 0.9009±0.0031 & 0.8815±0.0016 & 0.5327±0.0022 & [[ 1119   123], [ 1