In [None]:
import pandas as pd
import numpy as np
import math
import pylab
from pylab import *
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300

import os

In [None]:
save_folder = './Supplimentary_Sup/'
dataset = 'Immune_Human'

In [None]:
excel_path = '~/Downloads/All_metrics_15_Sep.xlsx' 
sheet_name =  'all_metrics_revision'

In [None]:
columns_to_scale = ["NMI cluster/label","ARI cluster/label","ASW label","ASW label/batch","PCR batch","isolated f1 score","isolated silhouette coefficient","graph connectivity","kBET","iLISI","cLISI"]
method_color_dct = {'scVI' : '#28DDED', 'Harmony': '#ED7A28','Seurat' : '#994363', 
       'BBKNN': '#B626D3', 'Scanorama': '#EDBF28', 
       'INSCT' : '#286CED', 'LIGER' : '#90EE90','fastMNN':  "#FFB6C1", "iMAP" : "#964B00",
       'scDREAMER': '#086E28', 'scANVI': '#c5b0d5',
'scGEN': '#d62829',
'scDREAMER-Sup': '#113f0a'
#                     "scANVI" : '#B626D3',
#         'scGEN': "#964B00",
#        'scDREAMER-Sup' : "#013220"
       }
methods_to_plot = ['scGEN','scANVI','scDREAMER-Sup']

# composite score supervised

In [None]:
import seaborn as sns
def plot_bar_multi(df,col,method_color_dct,save_folder=None):
    ax = sns.barplot(x='Percentage_wrong', y=col, hue='Method', data=df,palette=method_color_dct)#, ax=ax1)
    plt.xticks(rotation=90)
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),prop={'family':"Arial",'weight':'bold','size':14})
    ax.get_legend().remove()
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(14)
        tick.label1.set_fontweight('bold')
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(14)
        tick.label1.set_fontweight('bold')
    ax.set_ylabel(col,fontsize = 15, fontname='Arial', fontweight = 'bold')
    ax.set_xlabel('Percentage of missing cell type labels',fontsize = 15, fontname='Arial', fontweight = 'bold')
    rects = ax.patches
    for rect in rects:
        height = rect.get_height()
        ax.text(
        rect.get_x() + rect.get_width() / 2, height, round(height,2), ha="center", va="bottom"
            )
    ax. spines['top']. set_visible(False)
    ax. spines['right']. set_visible(False)
    if save_folder:
        if not os.path.exists(save_folder):
            print(save_folder)
            os.makedirs(save_folder)
        plt.savefig(save_folder+'/'+col.replace('/','_')+'.png', transparent=True, bbox_inches='tight')
    plt.show()
    
from sklearn.preprocessing import MinMaxScaler
def scale_with_mini(df,min_values):
    for col in df.columns:
        scaler = MinMaxScaler(feature_range=(min_values[col],1))
        df.loc[:,col] = scaler.fit_transform(np.array(df.loc[:,col]).reshape(-1,1))
    return df
def scale(df):
    scaler = MinMaxScaler()
    df = scaler.fit_transform(df)
    return df

# scale all min max

In [None]:
df = pd.read_excel(excel_path,sheet_name =sheet_name)
df_dataset = df[df['Dataset'] == dataset].reset_index(drop = True)
df_dataset.index = df_dataset['Method']
df_dataset.drop(['Dataset','Method'], inplace=True, axis=1)
df_dataset.loc[:,columns_to_scale] =scale(df_dataset.loc[:,columns_to_scale])
df_dataset = df_dataset.loc[methods_to_plot,:]
df_dataset

# scale with mini

In [None]:
# df = pd.read_excel(excel_path,sheet_name =sheet_name)
# df_dataset = df[df['Dataset'] == dataset].reset_index(drop = True)
# df_dataset.index = df_dataset['Method']
# df_dataset.drop(['Dataset','Method'], inplace=True, axis=1)
# mini_values = df_dataset.loc[:,columns_to_scale].min(axis = 0)
# df_dataset = df_dataset.loc[methods_to_plot,:]
# df_dataset.loc[:,columns_to_scale] =scale_with_mini(df_dataset.loc[:,columns_to_scale],mini_values)
# df_dataset

# scale normal or existing

In [None]:
# df = pd.read_excel(excel_path,sheet_name =sheet_name)
# df_dataset = df[df['Dataset'] == dataset].reset_index(drop = True)
# df_dataset.index = df_dataset['Method']
# df_dataset.drop(['Dataset','Method'], inplace=True, axis=1)
# df_dataset = df_dataset.loc[methods_to_plot,:]
# df_dataset.loc[:,columns_to_scale] =scale(df_dataset.loc[:,columns_to_scale])
# df_dataset

# calculate composite

In [None]:
if dataset in ['Immune_Human','Lung']:
    df_dataset['Percentage_wrong'] = df_dataset['Percentage_wrong'].astype('int32')
    df_dataset = df_dataset.apply(lambda x:x.apply(lambda y:round(y,2) if type(y)==type(0.0) else y))
    df_dataset['color'] = pd.Series(df_dataset.index,index=df_dataset.index).replace(method_color_dct)
    df_dataset['Method'] = df_dataset.index
    df_dataset['Composite bio-conservation score'] = df_dataset[['NMI cluster/label', 'ARI cluster/label', 'ASW label']].mean(axis = 1)
    df_dataset['Composite batch-correction score'] = df_dataset[['ASW label/batch', 'PCR batch', 'graph connectivity', 'kBET']].mean(axis = 1)
    df_dataset['Composite isolated label score'] = df_dataset[['isolated silhouette coefficient', 'isolated f1 score']].mean(axis = 1)
    df_dataset['Combined composite score'] = df_dataset[['Composite bio-conservation score', 'Composite batch-correction score']].mean(axis = 1)
    plot_bar_multi(df_dataset, 'Combined composite score',method_color_dct,save_folder =save_folder+dataset)
    plot_bar_multi(df_dataset, 'Composite batch-correction score',method_color_dct,save_folder =save_folder+dataset)
    plot_bar_multi(df_dataset, 'Composite bio-conservation score',method_color_dct,save_folder =save_folder+dataset)
    plot_bar_multi(df_dataset, 'Composite isolated label score',method_color_dct,save_folder =save_folder+dataset)

In [None]:
# scVI - cyan
# Harmony - orange
# Seurat - magenta
# BBKNN - purple
# Scanorama - yellow
# INSCT - blue
# iMAP - brown
# Liger - light green
# fastMNN - light pink
# scANVI - removed from main figure
# scDREAMER - Green
# scDREAMER++ - Red

# legend

In [None]:
df_dataset['Method'] = df_dataset.index
ax = sns.barplot(x='Percentage_wrong', y='NMI cluster/label', hue='Method', data=df_dataset,palette=method_color_dct)#, ax=ax1)
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5),prop={'family':"Arial",'size':14})
plt.savefig('./Supplimentary_Sup/Sup_legend.png', transparent=True, bbox_inches='tight')

In [None]:
if dataset in ['Immune_Human','Lung']:
    df_dataset['Percentage_wrong'] = df_dataset['Percentage_wrong'].astype('int32')
    df_dataset = df_dataset.apply(lambda x:x.apply(lambda y:round(y,2) if type(y)==type(0.0) else y))
    df_dataset['color'] = pd.Series(df_dataset.index,index=df_dataset.index).replace(method_color_dct)
    df_dataset['Method'] = df_dataset.index
    df_dataset['Composite bio-conservation score'] = df_dataset[['NMI cluster/label', 'ARI cluster/label', 'ASW label']].mean(axis = 1)
    df_dataset['Composite batch-correction score'] = df_dataset[['ASW label/batch', 'PCR batch', 'graph connectivity', 'kBET']].mean(axis = 1)
    df_dataset['Composite isolated label score'] = df_dataset[['isolated silhouette coefficient', 'isolated f1 score']].mean(axis = 1)
    df_dataset['Combined composite score'] = df_dataset[['Composite bio-conservation score', 'Composite batch-correction score']].mean(axis = 1)
    df_dataset = df_dataset[['Composite bio-conservation score','Composite batch-correction score','Composite isolated label score','Combined composite score','Percentage_wrong']]
    

In [None]:
df_dataset.to_csv(dataset+'_supervised_composite_scores.csv',sep='\t',index=True)