In [None]:
import math
import matplotlib as mpl
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pylab
from IPython.display import display
from pylab import *
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import os

mpl.rcParams["figure.dpi"] = 300

In [None]:
excel_path = "~/Downloads/All_metrics_15_Mar_23.xlsx"
sheet_name = "All_Metrics"

In [None]:
columns_to_scale = [
    "NMI cluster/label",
    "ARI cluster/label",
    "ASW label",
    "ASW label/batch",
    "PCR batch",
    "isolated f1 score",
    "isolated silhouette coefficient",
    "graph connectivity",
    "kBET",
    "iLISI",
    "cLISI",
]
method_color_dct = {
    "scVI": "#28DDED",
    "Harmony": "#ED7A28",
    "Seurat": "#994363",
    "BBKNN": "#B626D3",
    "Scanorama": "#EDBF28",
    "INSCT": "#286CED",
    "LIGER": "#90EE90",
    "fastMNN": "#FFB6C1",
    "iMAP": "#964B00",
    "scDML": "#6F3AF9",
    "scDREAMER": "#086E28",
    "scANVI": "#c5b0d5",
    "scGEN": "#d62829",
    "scDREAMER-Sup": "#113f0a",
    "scDREAMER redo": "#086E28",
    "scDREAMER-Sup redo": "#113f0a",
}
methods_to_plot = [
    "scVI",
    "Harmony",
    "Seurat",
    "BBKNN",
    "Scanorama",
    "INSCT",
    "LIGER",
    "iMAP",
    "scDML",
    "scDREAMER",
]

In [None]:
import seaborn as sns


def plot_bar_multi(df, col, method_color_dct, save_folder=None):
    ax = sns.barplot(
        x="cell_type", y=col, hue="epoch", data=df
    )  # ,palette=method_color_dct)#, ax=ax1)
    #     plt.xticks(rotation=90)
    ax.legend(
        loc="center left",
        bbox_to_anchor=(1, 0.5),
        prop={"family": "Arial", "weight": "bold", "size": 14},
    )
    #     ax.get_legend().remove()
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(14)
        tick.label1.set_fontweight("bold")
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(14)
        tick.label1.set_fontweight("bold")
    ax.set_ylabel(col, fontsize=15, fontname="Arial", fontweight="bold")
    ax.set_xlabel(
        "Cell types present in training for epoch(s)",
        fontsize=15,
        fontname="Arial",
        fontweight="bold",
    )
    rects = ax.patches
    for rect in rects:
        height = rect.get_height()
        ax.text(
            rect.get_x() + rect.get_width() / 2,
            height,
            round(height, 2),
            ha="center",
            va="bottom",
        )
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    if save_folder:
        if not os.path.exists(save_folder):
            print(save_folder)
            os.makedirs(save_folder)
        plt.savefig(
            save_folder + "/" + col.replace("/", "_") + ".png",
            transparent=True,
            bbox_inches="tight",
        )
    plt.show()

def scale_with_mini(df, min_values):
    for col in df.columns:
        scaler = MinMaxScaler(feature_range=(min_values[col], 1))
        df.loc[:, col] = scaler.fit_transform(np.array(df.loc[:, col]).reshape(-1, 1))
    return df


def scale(df):
    scaler = MinMaxScaler()
    df = scaler.fit_transform(df)
    return df

# composite score main

In [None]:
from sklearn.preprocessing import MinMaxScaler


def calculate_composite(df_dataset):
    df_dataset["color"] = pd.Series(df_dataset.index, index=df_dataset.index).replace(
        method_color_dct
    )
    df_dataset["Composite bio-conservation score"] = df_dataset[
        ["NMI cluster/label", "ARI cluster/label", "ASW label"]
    ].mean(axis=1)
    df_dataset["Composite batch-correction score"] = df_dataset[
        ["ASW label/batch", "PCR batch", "graph connectivity", "kBET"]
    ].mean(axis=1)
    df_dataset["Composite isolated label score"] = df_dataset[
        ["isolated silhouette coefficient", "isolated f1 score"]
    ].mean(axis=1)
    df_dataset["Combined composite score"] = df_dataset[
        ["Composite bio-conservation score", "Composite batch-correction score"]
    ].mean(axis=1)
    df_dataset = df_dataset.apply(
        lambda x: x.apply(lambda y: round(y, 2) if type(y) != str else y)
    )
    return df_dataset


# Pancreas

In [None]:
## knok out experiment
dataset = "Pancreas"
df = pd.read_excel(excel_path, sheet_name=sheet_name)
df_dataset = df[df["Dataset"] == dataset].reset_index(drop=True)
df_dataset.index = df_dataset["Method"]
df_dataset.drop(["Dataset", "Method"], inplace=True, axis=1)
df_dataset.loc[:, columns_to_scale] = scale(df_dataset.loc[:, columns_to_scale])
df_dataset = df_dataset.loc[df_dataset["Percentage_wrong"] == 0]
methods_to_plot_held_out = methods_to_plot.copy()
methods_to_plot_held_out.extend(
    ["Alpha", "Alpha 50", "Alpha 150", "Delta", "Delta 50", "Delta 150"]
)
df_dataset = df_dataset.loc[methods_to_plot_held_out, :]
df_dataset = calculate_composite(df_dataset)
df_dataset = df_dataset[
    [
        "Composite bio-conservation score",
        "Composite batch-correction score",
        "Composite isolated label score",
        "Combined composite score",
    ]
]
df_dataset = df_dataset.loc[
    ["Alpha", "Alpha 50", "Alpha 150", "Delta", "Delta 50", "Delta 150"], :
]
display(df_dataset)
df_dataset.to_csv(
    "./Pancreas_Head_Out/" + dataset + "_pancreas_composite_scores.csv",
    sep="\t",
    index=True,
)
df_dataset["cell_type"] = list(
    pd.Series(df_dataset.index).apply(lambda x: x.split(" ")[0])
)
df_dataset["epoch"] = list(
    pd.Series(df_dataset.index).apply(
        lambda x: x.split(" ")[1] if len(x.split(" ")) > 1 else 0
    )
)
plot_bar_multi(
    df_dataset,
    "Combined composite score",
    method_color_dct,
    save_folder="./Pancreas_Head_Out/",
)
plot_bar_multi(
    df_dataset,
    "Composite bio-conservation score",
    method_color_dct,
    save_folder="./Pancreas_Head_Out/",
)
plot_bar_multi(
    df_dataset,
    "Composite batch-correction score",
    method_color_dct,
    save_folder="./Pancreas_Head_Out/",
)