In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display
from pylab import *

mpl.rcParams["figure.dpi"] = 200

In [None]:
save_folder = "./Supplimentary/"

# raw score

In [None]:
excel_path = "~/Downloads/All_metrics_15_Mar_23.xlsx"
sheet_name = "All_Metrics"
# pd.options.mode.use_inf_as_na = True

In [None]:
cloumns_to_plot = [
    "NMI cluster/label",
    "ARI cluster/label",
    "ASW label",
    "ASW label/batch",
    "PCR batch",
    "isolated f1 score",
    "isolated silhouette coefficient",
    "graph connectivity",
    "kBET",
]
method_color_dct = {
    "scVI": "#28DDED",
    "Harmony": "#ED7A28",
    "Seurat": "#994363",
    "BBKNN": "#B626D3",
    "Scanorama": "#EDBF28",
    "INSCT": "#286CED",
    "LIGER": "#90EE90",
    "fastMNN": "#FFB6C1",
    "iMAP": "#964B00",
    "scDML": "#6F3AF9",
    "scDREAMER": "#086E28",
    "scANVI": "#c5b0d5",
    "scGEN": "#d62829",
    "scDREAMER-Sup": "#113f0a",
}
methods_to_plot = [
    "scVI",
    "Harmony",
    "Seurat",
    "BBKNN",
    "Scanorama",
    "INSCT",
    "LIGER",
    "iMAP",
    "scDML",
    "scDREAMER",
]  # ,'scDREAMER-Sup','scANVI','scGEN']

In [None]:
# directly downloaded from our sheet

In [None]:
import os


def plot_bar(df_, col_name, save_folder=False):  # save_name dataset

    rc("axes", linewidth=2)

    df_ = df_.loc[df_[col_name].notna(), :]

    fig = plt.figure(figsize=(6, 4))
    ax = df_[col_name].plot(kind="bar", color=df_["color"])

    rects = ax.patches
    ax.set_xticklabels(df_.index, rotation=75, fontname="Arial", fontsize=10)

    for rect, label in zip(rects, df_[col_name]):
        height = rect.get_height()
        ax.text(
            rect.get_x() + rect.get_width() / 2,
            height,
            round(label, 2),
            ha="center",
            va="bottom",
        )

    mi = df_[col_name].min()
    mx = df_[col_name].max()
    ylim(max(mi - 0.01, 0), min(mx * 1.05, 1.0))

    plt.ylabel(col_name, fontsize=15, fontname="Arial", fontweight="bold")
    plt.xlabel(None)  # , fontsize = 15, fontname='Arial', fontweight = 'bold')
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(14)
        tick.label1.set_fontweight("bold")
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(14)
        tick.label1.set_fontweight("bold")

    plt.subplot(111).spines["right"].set_visible(False)
    plt.subplot(111).spines["top"].set_visible(False)

    plt.tight_layout()

    if save_folder:
        if not os.path.exists(save_folder):
            print(save_folder)
            os.makedirs(save_folder)
        plt.savefig(
            save_folder + "/" + col_name.replace("/", "_") + ".png",
            transparent=True,
            bbox_inches="tight",
        )

    plt.show()


def plot_bar_all(
    df_, col_name, save_folder=False, unsup_x=3, sup_x=9.8
):  # save_name dataset

    rc("axes", linewidth=2)

    df_ = df_.loc[df_[col_name].notna(), :]

    # plot bar
    fig = plt.figure(figsize=(6, 4))
    ax = df_[col_name].plot(kind="bar", color=df_["color"])

    # remove x axis tics and labels
    ax.tick_params(labelbottom=False, bottom=False)

    rects = ax.patches
    for rect, label in zip(rects, df_[col_name]):
        height = rect.get_height()
        ax.text(
            rect.get_x() + rect.get_width() / 2,
            height,
            round(label, 2),
            ha="center",
            va="bottom",
        )

    mi = df_[col_name].min()
    mx = df_[col_name].max()
    mi, mx = ylim(max(mi - 0.05 * (mx - mi), 0), min(mx + (mx - mi) * 0.05, 1.0))
    # vertical line
    sep_line_x = (rects[-4].get_x() + rects[-4].get_width() + rects[-3].get_x()) / 2
    line = ax.plot(
        (sep_line_x, sep_line_x),
        (mi - 0.05 * (mx - mi), mx + (mx - mi) * 0.05),
        color="black",
        linestyle="--",
        clip_on=False,
    )

    # add legend supervised and unsupervised
    dy_legend = 0.06
    ax.text(
        unsup_x,
        mi - (mx - mi) * (dy_legend),
        "Unsupervised",
        fontsize=13,
        fontname="Arial",
        weight="bold",
    )
    ax.text(
        sup_x,
        mi - (mx - mi) * (dy_legend),
        "Supervised",
        fontsize=13,
        fontname="Arial",
        weight="bold",
        color="#000080",
    )

    plt.ylabel(col_name, fontsize=15, fontname="Arial", fontweight="bold")
    plt.xlabel(None)
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(14)
        tick.label1.set_fontweight("bold")
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(14)
        tick.label1.set_fontweight("bold")

    plt.subplot(111).spines["right"].set_visible(False)
    plt.subplot(111).spines["top"].set_visible(False)

    plt.tight_layout()

    if save_folder:
        if not os.path.exists(save_folder):
            print(save_folder)
            os.makedirs(save_folder)
        plt.savefig(
            save_folder + "/" + col_name.replace("/", "_") + ".png",
            transparent=True,
            bbox_inches="tight",
        )

    plt.show()

In [None]:
dataset = "Pancreas"
df = pd.read_excel(excel_path, sheet_name=sheet_name)
df_dataset = df[df["Dataset"] == dataset].reset_index(drop=True)
df_dataset.index = df_dataset["Method"]
df_dataset = df_dataset[df_dataset["Percentage_wrong"] == 0]
df_dataset.drop(["Dataset", "Method"], inplace=True, axis=1)
df_dataset = df_dataset.loc[methods_to_plot, cloumns_to_plot]
df_dataset = df_dataset.apply(lambda x: x.apply(lambda y: round(y, 2)))
df_dataset["color"] = pd.Series(df_dataset.index, index=df_dataset.index).replace(
    method_color_dct
)
display(df_dataset)
for i in df_dataset.columns[:-1]:
    print(i, dataset)
    plot_bar(df_dataset, i, save_folder=save_folder + dataset)

In [None]:
dataset = "Lung"
df = pd.read_excel(excel_path, sheet_name=sheet_name)
df_dataset = df[df["Dataset"] == dataset].reset_index(drop=True)
df_dataset.index = df_dataset["Method"]
df_dataset = df_dataset[df_dataset["Percentage_wrong"] == 0]
df_dataset.drop(["Dataset", "Method"], inplace=True, axis=1)
df_dataset = df_dataset.loc[methods_to_plot, cloumns_to_plot]
df_dataset = df_dataset.apply(lambda x: x.apply(lambda y: round(y, 2)))
df_dataset["color"] = pd.Series(df_dataset.index, index=df_dataset.index).replace(
    method_color_dct
)
display(df_dataset)
for i in df_dataset.columns[:-1]:
    print(i, dataset)
    plot_bar(df_dataset, i, save_folder=save_folder + dataset)

In [None]:
dataset = "Immune_Human"
df = pd.read_excel(excel_path, sheet_name=sheet_name)
df_dataset = df[df["Dataset"] == dataset].reset_index(drop=True)
df_dataset.index = df_dataset["Method"]
df_dataset = df_dataset[df_dataset["Percentage_wrong"] == 0]
df_dataset.drop(["Dataset", "Method"], inplace=True, axis=1)
df_dataset = df_dataset.loc[methods_to_plot, cloumns_to_plot]
df_dataset = df_dataset.apply(lambda x: x.apply(lambda y: round(y, 2)))
df_dataset["color"] = pd.Series(df_dataset.index, index=df_dataset.index).replace(
    method_color_dct
)
display(df_dataset)
for i in df_dataset.columns[:-1]:
    print(i, dataset)
    plot_bar(df_dataset, i, save_folder=save_folder + dataset)

In [None]:
dataset = "Human_Retina"
df = pd.read_excel(excel_path, sheet_name=sheet_name)
df_dataset = df[df["Dataset"] == dataset].reset_index(drop=True)
df_dataset.index = df_dataset["Method"]
df_dataset = df_dataset[df_dataset["Percentage_wrong"] == 0]
df_dataset.drop(["Dataset", "Method"], inplace=True, axis=1)
methods_to_plot_hr = [
    "scVI",
    "Harmony",
    "Seurat",
    "BBKNN",
    "Scanorama",
    "INSCT",
    "LIGER",
    "iMAP",
    "scDML",
    "scDREAMER",
    "scDREAMER-Sup",
    "scANVI",
    "scGEN",
]
df_dataset = df_dataset.loc[methods_to_plot_hr, cloumns_to_plot]
df_dataset = df_dataset.apply(lambda x: x.apply(lambda y: round(y, 2)))
df_dataset["color"] = pd.Series(df_dataset.index, index=df_dataset.index).replace(
    method_color_dct
)
display(df_dataset)
for i in df_dataset.columns[:-1]:
    print(i, dataset)
    if (
        i == "ASW label"
        or i == "ASW label/batch"
        or i == "PCR batch"
        or i == "isolated silhouette coefficient"
    ):
        plot_bar_all(df_dataset, i, save_folder=save_folder + dataset, sup_x=8.8)
        continue
    plot_bar_all(df_dataset, i, save_folder=save_folder + dataset)

In [None]:
dataset = "Human_Mouse"
df = pd.read_excel(excel_path, sheet_name=sheet_name)
df_dataset = df[df["Dataset"] == dataset].reset_index(drop=True)
df_dataset.index = df_dataset["Method"]
df_dataset = df_dataset[df_dataset["Percentage_wrong"] == 0]
df_dataset.drop(["Dataset", "Method"], inplace=True, axis=1)
methods_to_plot_hm = methods_to_plot.copy()
cloumns_to_plot_hm = cloumns_to_plot.copy()

methods_to_plot_hm.remove("Seurat")
cloumns_to_plot_hm.remove("isolated f1 score")
cloumns_to_plot_hm.remove("isolated silhouette coefficient")
df_dataset = df_dataset.loc[methods_to_plot_hm, cloumns_to_plot_hm]
df_dataset = df_dataset.apply(lambda x: x.apply(lambda y: round(y, 2)))
df_dataset["color"] = pd.Series(df_dataset.index, index=df_dataset.index).replace(
    method_color_dct
)
display(df_dataset)
for i in df_dataset.columns[:-1]:
    print(i, dataset)
    plot_bar(df_dataset, i, save_folder=save_folder + dataset)

In [None]:
# scVI - cyan
# Harmony - orange
# Seurat - magenta
# BBKNN - purple
# Scanorama - yellow
# INSCT - blue
# iMAP - brown
# Liger - light green
# fastMNN - light pink
# scANVI - removed from main figure
# scDREAMER - Green
# scDREAMER++ - Red