In [None]:
import sys
import subprocess

#import pkg_resources
#required = {'harmonypy','sklearn','scanpy','pandas', 'numpy', 'bbknn', 'scipy', 'matplotlib', 'seaborn' ,'scipy'}
#installed = {pkg.key for pkg in pkg_resources.working_set}
#missing = required - installed
#if missing:
#    print("Installing missing packages:" )
#    print(missing)
#    python = sys.executable
#    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)

%matplotlib inline
from collections import Counter
from collections import defaultdict
import scanpy as sc
import pandas as pd
import pickle as pkl
import numpy as np
from bbknn import bbknn
import scipy
import matplotlib.pyplot as plt
import re
import glob
import os
import sys
from geosketch import gs
from numpy import cov
import scipy.cluster.hierarchy as spc
import seaborn as sns; sns.set(color_codes=True)
from sklearn.linear_model import LogisticRegression
import sklearn
import harmonypy as hm
from pathlib import Path

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80, color_map='viridis')

# LR script

# Only block to edit in LR script

In [None]:
# Introduce variables
# Note that this script expects raw data to be in "non-batch-corrected" adata.raw.X. 

# Required: Introduce the path you'd like to save figures or data to 
save_path = "/home/jovyan/mount_farm/nfs/ar32/YS/Cite_Seq/add_new_meta_from_issac/LR_outs_YS_against_YS_citeseq_new_anno_20220401/"

# Required: Name of first object
data1 = "YS_main_object_training"
# Provide path to obj1 // landscape/training data
#Object1 = "/home/jovyan/YS_project/YS/Data_objects/YS_object_14092021/YS_with_new_meta_20210919.h5ad"
Object1 = "/home/jovyan/YS_project/YS/Data_objects/final_objects/A4_V7_YS_integrated_data_singlets_with_raw_counts_for_MS_plotting_20211111_with_obsp.h5ad"
# Provide categorical to join between datasets
cat1 = "broad_cell.labels"

# Required: Name of second object
data2 = "YS_CiteSeq_RNA_mito_removed"
# Provide path to obj2 // prediction/projection data
Object2 = "/home/jovyan/mount_farm/nfs/ar32/YS/Cite_Seq/add_new_meta_from_issac/cite_seq_rna_simple_raw_20220331.h5ad"
# Provide categorical to join between datasets
cat2 = "broad_anno"


# Required: LR Model Options
penalty='l2' # can be ["l1","l2","elasticnet"]
sparcity=0.2
max_iter = 1000 #Increase if experiencing max iter issues
l1_ratio = 0.5 #If using elasticnet, tis controls the ratio between l1 and l2

# Optional: Batch correction options (this is for correction of eventual combined dataset for data1 and data2)
# If you do not have a batch variable for either data1 or data2, please add a "filler" column in the relevent adata.obs
# for the purposes of batch_correction and batch args below.
# e.g., adata.obs["whatever"] = "something"; batch="whatever"
batch_correction = "Harmony" # Will accept Harmony, BBKNN or False as options
batch = ["lanes", "orig.ident"] # Will accept any batch categorical. Comma space a batch categorical for each dataset. Position 1 is for data1, position 2 is for data2

# Optional: miscellaneous Options.   
subsample_train = False # Samples the training data to the smallest fraction (highly dependent on resolution of input celltype categorical). This corrects for proportional differences between celltype labels of interest in the training data. E.g., training data has 50,000 B cells, 20,000 T cells and 100 HSCs. This function will subsample all training to 100 cells per cell type. 
subsample_prop = 0.2 # Give this option a proprtion to subsample to(e.g 0.2), if NA given, will subsample to smallest population
subsample_predict = False
subsample_prop_predict = 0.5
remove_non_high_var = True

train_x = 'X_pca' # Define the resource to train and predict on, PCA, X or UMAP (#if you wish to use gene expression, train_x = 'X')
remove_effect_of_custom_gene_list = '' # "./cell_cycle_genes.csv" #remove a custom list of genes from just the variable genes to compute PCA from. Your .csv should have HGNC gene names in the first column to be read in as a vector, any column name is fine.
use_raw = True # Do you want to use adata.raw.X (recommended)

# Rest of LR script

## Combining data and Preprocess

In [None]:
%%time
# Check if filepaths are good
if not os.path.exists(save_path):
    os.makedirs(save_path)
    
if (Path(Object1).is_file() & Path(Object2).is_file()):
    print("adata file paths detetcted, proceeding to load")
    adata = sc.read(Object1)
    adata2 = sc.read(Object2)
    del adata.uns
    del adata2.uns
else: 
    raise TypeError("one or more .h5ad paths cannot be accessed")

# altering scanpy setting so that we can save it to our defined directory
sc._settings.ScanpyConfig(figdir=save_path)

# Combine and pre-process data to match correlations across PCA

# Module to detect shape mismatch and alternatively rebuild adata
if(use_raw==True):
    print('option detected to use raw data, proceeding to check if raw exists and if it matches data.X')
    if (hasattr(adata.raw, "X")):
        try: adata.X =  adata.raw.X  ; print('no mismatch in shape for adata detected')
        except: print("adata.X shape mismatched with adata.raw.X, proceeding to re-build data") ; adata = adata.raw.to_adata()
    else:
        print("no raw data detected in adata! proceeding to create raw partition from adata.X")
        adata.raw = adata
        
    if (hasattr(adata2.raw, "X")):
        try: adata2.X = adata2.raw.X ; print('no mismatch in shape for adata2 detected')
        except: print("adata2.X shape mismatched with adata.raw.X, proceeding to re-build data") ; adata2 = adata2.raw.to_adata()
    else:
        print("no raw data detected in adata! proceeding to create raw partition from adata.X")
        adata2.raw = adata2
           
# Define intersecting genes between datasets
adata_genes = list(adata.var.index)
adata2_genes = list(adata2.var.index)
keep_SC_genes = list(set(adata_genes) & set(adata2_genes))
print("keep gene list = " , len(keep_SC_genes), "adata1 gene length = ", len(adata_genes) , "adata2 gene length = ", len(adata2_genes) )

# Remove non-intersecting genes (this step will remove cite-seq data if training data is pure RNA seq)
adata_intersect1 = adata[:, keep_SC_genes]
adata = adata_intersect1
adata_intersect2 = adata2[:, keep_SC_genes]
adata2 = adata_intersect2

# Optional subsampling of training data to 
if(subsample_train == True):
    
    if not(subsample_prop=="NA"):
        print("option to subsample by proportion chosen")
        prop = subsample_prop
        data = adata.obs[:]
        grouped = data.groupby(cat1)
        df = grouped.apply(lambda x: x.sample(frac=prop))
        df = df.droplevel(cat1)
        keep = df.index
        adata = adata[adata.obs.index.isin(keep)]
    else:
        print("subsample by smallest population")
        data = adata.obs
        data = data.sample(frac=1).groupby(cat1).head(min(adata.obs.groupby(cat1).size()))
        keep = data.index
        adata = adata[adata.obs.index.isin(keep)]
        
# Optional subsampling of training data to 
if(subsample_predict == True):
    if not(subsample_prop_predict=="NA"):
        print("option to subsample by proportion chosen")
        prop = subsample_prop_predict
        data = adata2.obs[:]
        grouped = data.groupby(cat2)
        df = grouped.apply(lambda x: x.sample(frac=prop))
        df = df.droplevel(cat2)
        keep = df.index
        adata2 = adata2[adata2.obs.index.isin(keep)]
    else:
        print("subsample by smallest population")
        data = adata2.obs
        data = data.sample(frac=1).groupby(cat2).head(min(adata.obs.groupby(cat2).size()))
        keep = data.index
        adata2 = adata2[adata2.obs.index.isin(keep)]

# Create a common batch column and do simple sanity check for batch variables
if not((batch_correction == "False") and (len(batch)>1)):
    print("Batch correction option detected, proceeding to format batch variables")
    batch_var = "lr_batch"
    adata.obs["lr_batch"] = adata.obs[batch[0]]
    adata2.obs["lr_batch"] = adata2.obs[batch[1]]
else: raise TypeError("Batch correction option detected but requires at least one categorical for each dataset!")

# Create a common obs column in both datasets containing the data origin tag
common_cat = "corr_concat" 
adata.obs[common_cat] = adata.obs[cat1].astype(str) + data1
adata2.obs[common_cat] = adata2.obs[cat2].astype(str) + data2
adata.obs = adata.obs.astype('category')
adata2.obs = adata2.obs.astype('category')
concat = adata2.concatenate(adata, join='inner',index_unique=None, batch_categories=None)
adata = concat[:]
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.1, max_mean=4)
sc.pp.scale(adata, zero_center=False, max_value=None, copy=False) #zero_center=True (densifies output)

# Optionally remove genes of known confounding effect from variable list
if not (Path(remove_effect_of_custom_gene_list).is_file()):
    print("Custom gene list option is not selected or path is not readbale, proceeding with no variable removal")
else: 
    print("Custom gene removal list detected, proceeding to remove intersect from variable genes")
    regress_list = pd.read_csv(remove_effect_of_custom_gene_list)
    regress_list = regress_list.iloc[:, 0]
    adata.var["highly_variable"][adata.var.index.isin(regress_list)] = "False"

#Optionally remove genes that do not contribute to variance in combined data::Use only if training and predicting withsim reduced data    
if(remove_non_high_var==True):
    high_var = list(adata.var["highly_variable"][adata.var["highly_variable"]==True])
    adata = adata[:, adata.var["highly_variable"].isin(high_var)]   

# Now compute PCA
sc.pp.pca(adata, n_comps=50, use_highly_variable=True, svd_solver='arpack')

# Batch correction options
# The script will test later which Harmony values we should use
if not(batch_correction == "False"):
    sc.pp.neighbors(adata, n_neighbors=15, n_pcs=50)    
if(batch_correction == "Harmony"):
    print("Commencing harmony")
    # Create hm subset
    adata_hm = adata[:]
    # Set harmony variables
    data_mat = np.array(adata_hm.obsm["X_pca"])
    meta_data = adata_hm.obs
    vars_use = [batch_var]
    # Run Harmony
    ho = hm.run_harmony(data_mat, meta_data, vars_use)
    res = (pd.DataFrame(ho.Z_corr)).T
    res.columns = ['X{}'.format(i + 1) for i in range(res.shape[1])]
    # Insert coordinates back into object
    adata_hm.obsm["X_pca_back"]= adata_hm.obsm["X_pca"][:]
    adata_hm.obsm["X_pca"] = np.array(res)
    # Run neighbours
    sc.pp.neighbors(adata_hm, n_neighbors=15, n_pcs=50)
    adata = adata_hm[:]
    del adata_hm
elif(batch_correction == "BBKNN"):
    print("Commencing BBKNN")
    sc.external.pp.bbknn(adata, batch_key=batch_var, approx=True, metric='angular', copy=False, n_pcs=50, trim=None, n_trees=10, use_faiss=True, set_op_mix_ratio=1.0, local_connectivity=15) 
    
print("adata1 and adata2 are now combined and preprocessed in 'adata' obj - success!")

## Logistic regression function to train data set and transfer labels

In [None]:
# This function require compute power, will take a while 

#def LR_compare(adata, train_x, train_label, subset_predict, subset_train, penalty=penalty, sparcity=sparcity, 
#               col_name='predicted'):
#
#    # adata - training+prediction adata object (combined). Pre-processed already
#    # sparsity - larger sparsity, more bins, more conservative predictions, less accurate. Low sparist for clean output
#                # A value of 0.2 is reasonable for L2 ridge regression
#    # penalty - acts as buffer for assigning bins too harshly
#    # train_x - arg refers to where you would like to derive your training reference from, i.e., GEX (X) or/elif.
#                # PCA/UMAP in obsm. The two 'if' statements below handle train_x differently based on this
#                # Based on train_x, the loops below compute 'train_label' (cell type values in training/landscape data) 
#                # and 'predict_x'(prediction data equivalent of train_x)
#    # train_label - cell type values in training/landscape data
#    # subset_predict - mandatory subset of predict_x which contains metadata for expression
#    # subset_train - mandatory subset of train_x which contains metadata for expression
#    
#    # Redefine LR parameters 'penalty' and 'sparsity' if you would like to deviate from defaults set above
#    
#    # Assign 'lr' as sklearn logistic regression func, with penalty and sparsity defined above
#    lr = LogisticRegression(penalty = penalty, C = sparcity, max_iter =  max_iter)
#    
#    if (penalty == "l1"):
#        lr = LogisticRegression(penalty = penalty, C = sparcity, max_iter =  max_iter, dual = True, solver = 'liblinear')
#    if (penalty == "elasticnet"):
#        lr = LogisticRegression(penalty = penalty, C = sparcity, max_iter =  max_iter, dual=False,solver = 'saga',l1_ratio=l1_ratio)
#
#    if train_x == 'X':
#        # Define training parameters
#        train_label = adata.obs[common_cat].values
#        train_label = train_label[subset_train]
#        #train_x = adata.X,
#        # Define prediction parameters
#        #predict_x = train_x
#        #train_x = train_x[subset_train, :] # issue line! subset_train = np.array(adata.obs[common_cat].isin(group1))
#                                           # group1 = (adata.obs[common_cat][adata.obs[common_cat].str.contains(Data1_group)]).unique()
#                                           # Data1_group = data1 = healthy skin data , adata containing subsetting data to get metadata for expression prediction
#                                           # adata.X = adata.X[np.array(adata.obs[common_cat].isin(group1)), :]
#                                           # train_x = train_x[adata.obs[common_cat].isin(group1)]
#                        
#        #predict_x = train_x
#        #predict_x = predict_x[subset_predict]
#        train_x = adata.X[adata.obs.index.isin(list(adata.obs[subset_train].index))]
#        predict_x = adata.X[adata.obs.index.isin(list(adata.obs[subset_predict].index))]
#
#    elif train_x in adata.obsm.keys():
#        # Define training parameters
#        train_label = adata.obs[common_cat].values
#        train_label = train_label[subset_train]
#        train_x = adata.obsm[train_x]
#        predict_x = train_x
#        train_x = train_x[subset_train, :]
#        # Define prediction parameters
#        predict_x = predict_x[subset_predict]
#        predict_x = pd.DataFrame(predict_x)
#        predict_x.index = adata.obs[subset_predict].index
#
#    # Train predictive model using user defined partition labels (train_x ,train_label, predict_x)
#    model = lr.fit(train_x, train_label)
#    lr.fit(train_x, train_label)
#    predict = lr.predict_proba(predict_x)
#
#    # Create prediction table and map to adata.obs (in adata.obs["predict"] in the combined object), for the cells that
#    # are in predict dataset
#    predict = lr.predict(predict_x)
#    predict = pd.DataFrame(predict)
#    predict.index = adata.obs[subset_predict].index
#    adata.obs[col_name] = adata.obs.index
#    adata.obs[col_name] = adata.obs[col_name].map(predict[0])

# Function to plot heatmap by percentage
def plot_df_heatmap(df, cmap='viridis', title=None, figsize=(7, 7), rotation=90, save=None, **kwargs):
    fig, ax = plt.subplots(figsize=figsize)
    im = ax.imshow(df, cmap=cmap, aspect='auto', **kwargs)
    if 0 < rotation < 90:
        horizontalalignment = 'right'
    else:
        horizontalalignment = 'center'
    plt.xticks(
        range(len(df.columns)),
        df.columns,
        rotation=rotation,
        horizontalalignment=horizontalalignment,
    )
    plt.yticks(range(len(df.index)), df.index)
    if title:
        fig.suptitle(title)
    #fig.colorbar(im)
    if save:
        plt.savefig(fname=save, bbox_inches='tight', pad_inches=0.1)

# Plot probability table by html
def cross_table(adata, x, y, normalise=None, highlight=False, subset=None):                                                                                                                                                                                              
    """Make a cross table comparing two categorical annotations
    """
    x_attr = adata.obs[x]
    y_attr = adata.obs[y]
    if subset is not None:
        x_attr = x_attr[subset]
        y_attr = y_attr[subset]
    crs_tbl = pd.crosstab(x_attr, y_attr)
    if normalise == 'x':
        x_sizes = x_attr.groupby(x_attr).size().values
        crs_tbl = (crs_tbl.T / x_sizes).round(2).T
    elif normalise == 'y':
        y_sizes = x_attr.groupby(y_attr).size().values
        crs_tbl = (crs_tbl / y_sizes).round(2)
    if highlight:
        return crs_tbl.style.background_gradient(cmap='viridis', axis=0)
    return crs_tbl

In [None]:
# Define the separator category in the column of interest, this works by partial matches and enables a-symmetric 
# comparisons
Data1_group = data1
Data2_group = data2
# Define the common .obs column between concatinated data
common_cat = "corr_concat"

# This block defines subset_predict and subset_train and also runs LR_compare function
group1 = (adata.obs[common_cat][adata.obs[common_cat].str.contains(Data1_group)]).unique()
group1 = list(group1)
group2 = (adata.obs[common_cat][adata.obs[common_cat].str.contains(Data2_group)]).unique()
group2 = list(group2)
subset_predict = np.array(adata.obs[common_cat].isin(group2))
subset_train = np.array(adata.obs[common_cat].isin(group1))
train_label = (adata.obs[common_cat][adata.obs[common_cat].isin(group1)]).values

In [None]:
# Assign 'lr' as sklearn logistic regression func, with penalty and sparsity defined above
lr = LogisticRegression(penalty = penalty, C = sparcity, max_iter =  max_iter)

if (penalty == "l1"):
    lr = LogisticRegression(penalty = penalty, C = sparcity, max_iter =  max_iter, dual = True, solver = 'liblinear',multi_class = 'ovr' ) # one-vs-rest
if (penalty == "elasticnet"):
    lr = LogisticRegression(penalty = penalty, C = sparcity, max_iter =  max_iter, dual=False,solver = 'saga',l1_ratio=l1_ratio,multi_class = 'multinomial')
if train_x == 'X':
    # Define training parameters
    train_label = adata.obs[common_cat].values
    predict_label = train_label[subset_predict]
    train_label = train_label[subset_train]
    train_x = adata.X[adata.obs.index.isin(list(adata.obs[subset_train].index))]
    predict_x = adata.X[adata.obs.index.isin(list(adata.obs[subset_predict].index))]
elif train_x in adata.obsm.keys():
    # Define training parameters
    train_label = adata.obs[common_cat].values
    predict_label = train_label[subset_predict]
    train_label = train_label[subset_train]
    train_x = adata.obsm[train_x]
    predict_x = train_x
    train_x = train_x[subset_train, :]
    # Define prediction parameters
    predict_x = predict_x[subset_predict]
    predict_x = pd.DataFrame(predict_x)
    predict_x.index = adata.obs[subset_predict].index
# Train predictive model using user defined partition labels (train_x ,train_label, predict_x)
lr.fit(train_x, train_label)
predict_proba = lr.predict_proba(predict_x)
# Create prediction table and map to adata.obs (in adata.obs["predict"] in the combined object), for the cells that
# are in predict dataset
predict = lr.predict(predict_x)
predict = pd.DataFrame(predict)
predict.index = adata.obs[subset_predict].index
col_name='predicted'
adata.obs[col_name] = adata.obs.index
adata.obs[col_name] = adata.obs[col_name].map(predict[0])

In [None]:
import pickle

In [None]:
filename = '/home/jovyan/mount_farm/nfs/ar32/YS/Cite_Seq/add_new_meta_from_issac/LR_outs_YS_against_YS_citeseq_new_anno_20220401/YS_training_against_YS_citeseq_model_20220401.sav'
pickle.dump(lr, open(filename, 'wb'))

In [None]:
model = lr

In [None]:
# save combined object
adata.write('Combined_YS_main_CiteSeq_RNA_object_mito_removed_probability_comparison_20220401.h5ad')

In [None]:
train_label = adata.obs[common_cat].values
predict_label = train_label[subset_predict]

pred_out = pd.DataFrame(model.predict(predict_x),columns = ['predicted'],index = adata.obs.index[adata.obs[common_cat].isin(group2)])
pred_out['orig_labels'] = predict_label
proba = pd.DataFrame(model.predict_proba(predict_x),columns = lr.classes_,index = adata.obs.index[adata.obs[common_cat].isin(group2)])
pred_out = pred_out.join(proba)

In [None]:
run_date = '20220401'
pred_out.to_csv(save_path + '/pred_out_' + run_date + '.csv')

In [None]:
model_mean_probs = pred_out.loc[:, pred_out.columns != 'predicted'].groupby('orig_labels').mean()
#model_mean_probs = pred_out.loc[:, pred_out.columns != 'predicted'].groupby('orig_labels').median()
model_mean_probs = model_mean_probs #*100
model_mean_probs = model_mean_probs.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
crs_tbl = model_mean_probs.copy()
# Sort df columns by rows
crs_tbl = crs_tbl.sort_values(by =list(crs_tbl.index), axis=1,ascending=False)



# Plot_df_heatmap(crs_tbl, cmap='coolwarm', rotation=90, vmin=20, vmax=70)
pal = sns.diverging_palette(240, 10, n=10)
plt.figure(figsize=(20,15))

sns.set(font_scale=0.8)
g = sns.heatmap(crs_tbl, cmap=pal,  annot=True,vmin=0, vmax=1, linewidths=1, center=0.5, square=True, cbar_kws={"shrink": 0.5})
plt.ylabel("Original labels")
plt.xlabel("Training labels")

In [None]:
crs_tbl

In [None]:
list(crs_tbl.columns)

In [None]:
crs_tbl2 = crs_tbl.copy()

In [None]:
crs_tbl2= crs_tbl2[['ProgenitorsYS_main_object_training',
'LymphoidYS_main_object_training',
'DCYS_main_object_training',
'MonocyteYS_main_object_training',
'MacrophageYS_main_object_training',
'MicrogliaYS_main_object_training',
'Granulocyte_precursorsYS_main_object_training',
'Mast_cellYS_main_object_training',
'MKYS_main_object_training',
'ErythroidYS_main_object_training',
'EndotheliumYS_main_object_training',
'FibroblastYS_main_object_training',
'Smooth_MuscleYS_main_object_training',
'MesotheliumYS_main_object_training',
'EndodermYS_main_object_training']]
crs_tbl2

In [None]:
crs_tbl2 = crs_tbl2.T

In [None]:
crs_tbl2.columns

In [None]:
crs_tbl2 = crs_tbl2[['ProgenitorsYS_CiteSeq_RNA_mito_removed',
'LymphoidYS_CiteSeq_RNA_mito_removed',
'pDC precursorYS_CiteSeq_RNA_mito_removed',
'MonocyteYS_CiteSeq_RNA_mito_removed',
'MacrophageYS_CiteSeq_RNA_mito_removed',
'MicrogliaYS_CiteSeq_RNA_mito_removed',
'Mast_cellYS_CiteSeq_RNA_mito_removed',
'MKYS_CiteSeq_RNA_mito_removed',
'ErythroidYS_CiteSeq_RNA_mito_removed',
'EndotheliumYS_CiteSeq_RNA_mito_removed',
'FibroblastYS_CiteSeq_RNA_mito_removed',
'Smooth_MuscleYS_CiteSeq_RNA_mito_removed',
'MesotheliumYS_CiteSeq_RNA_mito_removed',
'EndodermYS_CiteSeq_RNA_mito_removed']]
crs_tbl2 = crs_tbl2.T

In [None]:
crs_tbl2

In [None]:
crs_tbl2 = crs_tbl2.T #(so YS is on left, cite-seq on bottom)

In [None]:
# Plot_df_heatmap(crs_tbl, cmap='coolwarm', rotation=90, vmin=20, vmax=70)
pal = sns.diverging_palette(240, 10, n=10)
plt.figure(figsize=(20,15))

sns.set(font_scale=0.8)
g = sns.heatmap(crs_tbl2, cmap=pal,  annot=False,vmin=0, vmax=1, linewidths=1, center=0.5, square=True, cbar_kws={"shrink": 0.5})
plt.ylabel("Original labels")
plt.xlabel("Training labels")

In [None]:
# Plot_df_heatmap(crs_tbl, cmap='coolwarm', rotation=90, vmin=20, vmax=70)
pal = sns.diverging_palette(240, 10, n=10)
plt.figure(figsize=(20,15))

sns.set(font_scale=0.8)
g = sns.heatmap(crs_tbl2, cmap=pal,  annot=False,vmin=0, vmax=1, linewidths=1, center=0.5, square=True, cbar_kws={"shrink": 0.5})
plt.ylabel("Original labels")
plt.xlabel("Training labels")
crs_tbl.to_csv(save_path + "/crs_tbl_20220401.csv")
plt.savefig(save_path + "/marker_comparison_heatmap_20220401.pdf")