In [None]:
import os
import sys
print("Python version" + sys.version)
os.getcwd()
print(sys.executable)

In [None]:
import numpy as np
np.random.seed(123)
import pandas as pd
import scipy
import itertools

import umap
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import scanpy as sc
import anndata as ad
import scvelo as scv
from tqdm.notebook import tqdm

from pathlib import Path

In [None]:
from IPython.display import clear_output

In [None]:
from muon import prot as pt
from joblib import dump, load

In [None]:
import scrublet as scr

In [None]:
sc.settings.verbosity = 1
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

In [None]:
# remove weird grid from scvelo
plt.rcParams['axes.grid'] = False

In [None]:
import itertools

In [None]:
from vcf_functions import *

In [None]:
from scitcem_utils import *

In [None]:
# revised from Stefan's cell type signature
signatures_path_ = '../cell_type_from_stefan/scrnaseq_signature_collection/'
from score_and_classify import *

### variables

In [None]:
data_folder = '/fast/users/twei_m/work/crc/datasets'

In [None]:
new_data_folder = '/fast/users/twei_m/work/crc/datasets_new_preprocessing'

In [None]:
tsamples = ['p007t', 'p008t', 'p009t1','p009t2', 'p013t', 'p014t', 'p016t', 
           'p020t', 'p021t', 'p026t', 'p035t'] 

In [None]:
samples = ['p007n', 'p008n', 'p009n1', 'p009n2','p013n', 'p014n', 'p016n', 
           'p020n', 'p021n'] + tsamples

In [None]:
MSI_list = ['p026', 'p035', 'p050']

In [None]:
demux_sample = ['p020n', 'p021n', 'p020t', 'p021t']

### add protein info to demux_sample and add to adata list

In [None]:
def get_adata(datafolder, sample, scrublet_threshold=None):

    # read
    adata = sc.read_10x_h5(datafolder/f'CellBender/{sample}/cellbender_matrix_filtered.h5', gex_only=False)
    
    gene = adata.copy()
        
    gene.layers["CB_counts"] = gene.X.copy()
    
    gene.obs['sample'] = sample
    gene.obs['sample_origin'] = ['tumour' if sample[4:5] == 't' else 'normal'][0]
    gene.obs['patient'] = sample[:4]
    gene.obs['MS_status'] = ['MSI' if sample[:4] in MSI_list else 'MSS'][0]

    # sample + cell id
    gene.obs_names = [sample + ':' + x.split('-')[0] for x in gene.obs_names] # do this after getting cell id list
    gene.var_names_make_unique()
    
    # just empty row for concat
    gene.obs['target_hashtag'] = None
    gene.obs['second_hashtag'] = None
    gene.obs['high_prob_warm'] = None
    
    # scrublet score
    scrub = scr.Scrublet(gene.X)
        
    doublet_scores, predicted_doublets = scrub.scrub_doublets(get_doublet_neighbor_parents=False)

    if scrublet_threshold:
        predicted_doublets = scrub.call_doublets(threshold=scrublet_threshold)
        
    gene.obs[['doublet_score', 'predicted_doublet']] = pd.DataFrame({'doublet_score': doublet_scores, 
                                                                     'predicted_doublet': predicted_doublets}, 
                                                                    index=gene.obs_names)
    
    # add some QC plots
    scrub.plot_histogram()
    
    scrub.set_embedding('UMAP', scr.get_umap(scrub.manifold_obs_, 10, min_dist=0.3))
    
    scrub.plot_embedding('UMAP', order_points=True)

    return gene

In [None]:
def get_adata_and_protein(datafolder, sample, target_hashtag, second_hashtag, scrublet_threshold=None):

    # read
    adata = sc.read_10x_h5(datafolder/f'CellBender/{sample}/cellbender_matrix_filtered.h5', gex_only=False)
    
    gene = adata.copy()
        
    gene.layers["CB_counts"] = gene.X.copy()
    
    gene.obs['sample'] = sample
    gene.obs['sample_origin'] = ['tumour' if sample[4:5] == 't' else 'normal'][0]
    gene.obs['patient'] = sample[:4]
    gene.obs['MS_status'] = ['MSI' if sample[:4] in MSI_list else 'MSS'][0]

    
    # protein part
    protein = adata[:, adata.var["feature_types"] == "Antibody Capture"].copy()
    
    # normalise protein
    pt.pp.clr(protein)
    
    # add the two hashtags to adata.obs
    gene.obs['target_hashtag'] = pd.DataFrame(protein.X[:,target_hashtag].toarray(), columns = ['target_hashtag'],
                                           index=protein.obs.index).reindex(gene.obs.index)

    gene.obs['second_hashtag'] = pd.DataFrame(protein.X[:,second_hashtag].toarray(), columns = ['second_hashtag'],
                                           index=protein.obs.index).reindex(gene.obs.index)
    
    gene.obs['high_prob_warm'] = (gene.obs['target_hashtag'] >1) & (gene.obs['second_hashtag'] < 0.1)
    
    # sample + cell id
    gene.obs_names = [sample + ':' + x.split('-')[0] for x in gene.obs_names] # do this after getting cell id list
    gene.var_names_make_unique()
    
    # scrublet score
    scrub = scr.Scrublet(gene.X)
    doublet_scores, predicted_doublets = scrub.scrub_doublets(get_doublet_neighbor_parents=False)
    
    if scrublet_threshold:
        predicted_doublets = scrub.call_doublets(threshold=scrublet_threshold)
    
    gene.obs[['doublet_score', 'predicted_doublet']] = pd.DataFrame({'doublet_score': doublet_scores, 
                                                                     'predicted_doublet': predicted_doublets}, 
                                                                    index=gene.obs_names)

    # add some QC plots
    scrub.plot_histogram()
    
    scrub.set_embedding('UMAP', scr.get_umap(scrub.manifold_obs_, 10, min_dist=0.3))
    
    scrub.plot_embedding('UMAP', order_points=True)
    
    return gene

In [None]:
adatas = {}

In [None]:
hashtag_list = [[1,3], [0,2], [10,11], [8,9]] # hashtag number -1 

In [None]:
# scrublet threshold after inspecting the hist
demux_threshold_list = 0.3

In [None]:
for n in np.arange(0,4):
    gene = get_adata_and_protein(Path(data_folder), demux_sample[n],
                                 hashtag_list[n][0], hashtag_list[n][1], demux_threshold_list)
    adatas[demux_sample[n]] = gene

### add all the rest h5s to the adata list and concat

In [None]:
rest_sample = samples.copy()
[rest_sample.remove(i) for i in rest_sample if i in demux_sample]
[rest_sample.remove(i) for i in rest_sample if i in demux_sample]

In [None]:
len(rest_sample)

In [None]:
rest_sample_threshold = 0.3

In [None]:
rest_sample

In [None]:
for sample in rest_sample:
    gene = get_adata(Path(data_folder), sample, rest_sample_threshold)
    adatas[sample] = gene

### preprocessing

In [None]:
adata_all = sc.concat(adatas)

In [None]:
adata_all.obs['sample'].value_counts().sort_index()

In [None]:
adata_all.obs[['sample','predicted_doublet']].value_counts().sort_index()
## p007n at 80% doublet rate is weird, Automatically set threshold at doublet score = 0.05

#### before filtering

In [None]:
adata_all.var['mt'] = adata_all.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata_all, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
sc.pl.violin(adata_all, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

#### filtering and remove doublet

In [None]:
adata_all_copy = adata_all.copy()

In [None]:
adata_all_copy.shape

In [None]:
#### since scrublet is weird in p007n & all of the sample has less than 1%, we dont remove the doublets
#### fix the issue by manually assigned thresholds
adata_all = adata_all[adata_all.obs['predicted_doublet'] == False].copy()

In [None]:
adata_all.shape

In [None]:
sc.pp.filter_cells(adata_all, min_counts=1000)  # uhlitz
#sc.pp.filter_cells(adata_all, max_counts=50000)  # uhlitz
sc.pp.filter_cells(adata_all, min_genes=500)  # uhlitz
#sc.pp.filter_cells(adata_all, max_genes=5000)  # uhlitz

In [None]:
sc.pp.calculate_qc_metrics(adata_all, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
sc.pl.violin(adata_all, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

In [None]:
adata_all = adata_all[adata_all.obs.pct_counts_mt < 80, :]

In [None]:
# sanity
sc.pp.filter_genes(adata_all, min_cells=1)

In [None]:
sc.pl.violin(adata_all, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt'],
             jitter=0.4, multi_panel=True)

In [None]:
adata_all.obs[['sample']].value_counts().sort_index()

In [None]:
sc.pp.normalize_per_cell(adata_all)
sc.pp.log1p(adata_all)

In [None]:
score_cell_cycle(adata_all, signatures_path_)
sc.pp.highly_variable_genes(adata_all, n_top_genes=2000, batch_key='sample')

In [None]:
sc.pl.highly_variable_genes(adata_all)

In [None]:
get_ribo_percentage(adata_all)
get_hemo_percentage(adata_all)

In [None]:
sc.tl.pca(adata_all, svd_solver='arpack', n_comps = 50, use_highly_variable=True)
sc.pl.pca_variance_ratio(adata_all, log=False)

In [None]:
sc.pp.neighbors(adata_all, n_neighbors=50, n_pcs=20)
sc.tl.umap(adata_all)
sc.tl.diffmap(adata_all)
sc.tl.louvain(adata_all, key_added='louvain', resolution=1)
sc.tl.louvain(adata_all, key_added='louvain_highres', resolution=2)

In [None]:
sc.tl.leiden(adata_all, key_added='leiden')
sc.tl.leiden(adata_all, key_added='leiden_highres', resolution=2)

In [None]:
scv.pl.scatter(adata_all, basis='umap', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'target_hashtag', 'second_hashtag', 'high_prob_warm'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 1)

In [None]:
score_smillie_str_epi_imm(adata_all, signatures_path_)

In [None]:
adata_all.obs['celltype_1a'] = np.array(['epi', 'str', 'imm'])[np.argmax(adata_all.obs[['epi_score', 'str_score', 'imm_score']].values, axis=1)]
adata_all.obs['celltype_1a_score'] = np.max(adata_all.obs[['epi_score', 'str_score', 'imm_score']].values, axis=1)


In [None]:
adata_all #73294

In [None]:
scv.pl.scatter(adata_all, basis='umap', color=['celltype_1a', 'epi_score', 'str_score', 'imm_score'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 1)

In [None]:
adata_all.obs['celltype_1a'].value_counts()

In [None]:
adata_all.obs[['sample','celltype_1a']].value_counts().sort_index()

In [None]:
adata_all.obs['sample_origin'].value_counts(dropna=False)

adata_all.write(Path(new_data_folder)/'202305_CB_all_cells.h5')

### write cell id as txt

In [None]:
celltype = ['epi', 'str', 'imm']

In [None]:
adata_copy = adata_all.copy()

In [None]:
adata_copy.obs_names = [x.split(':')[1] + '-1' for x in adata_copy.obs_names]

#### write cell type anno txt
for sample in samples:
    for ctype in celltype:
        (adata_copy[(adata_copy.obs['sample'] == sample) & (adata_copy.obs['celltype_1a'] == ctype)].
         obs_names.to_frame(name = 'cell_id').to_csv(Path(new_data_folder)/f'anno/{sample}_{ctype}.txt', 
                                                     index = False,
                                                     header = False))

In [None]:
del adata_copy

adata_all = sc.read(Path(new_data_folder)/'202305_CB_all_cells.h5')

adata_all.uns['log1p']['base'] = None

In [None]:
adata_epi = adata_all[adata_all.obs['celltype_1a'] == 'epi'].copy()

In [None]:
adata_epi.shape #39780 #39168

In [None]:
adata_epi.obs['sample'].value_counts().sort_index()

In [None]:
adata_epi.obs['sample_origin'].value_counts(dropna=False)

### assign HVGs

#### separate HVGs
adata_list = samples
intersected = samples

for i in np.arange(0,len(samples)):
    adata_list[i] = adata_epi[adata_epi.obs['sample'] == samples[i]]
    sc.pp.highly_variable_genes(adata_list[i], n_top_genes=2000) 


for i in np.arange(0,len(samples)):
    intersected[i] = adata_list[i].copy()
    intersected[i] = intersected[i][:, intersected[i].var.highly_variable]
    intersected[i] = intersected[i].var.index.values
    

merged_HVG = list(itertools.chain.from_iterable(intersected))

plt.hist(pd.DataFrame(merged_HVG).value_counts());

(pd.DataFrame(merged_HVG).value_counts()>=5).value_counts()

len(np.unique(merged_HVG))

merged_HVG_list = pd.DataFrame({'highly_variable': pd.DataFrame(merged_HVG).value_counts()>=5}).reset_index()
merged_HVG_list = merged_HVG_list[merged_HVG_list['highly_variable'] == True][0].tolist()

len(merged_HVG_list) 2025

#### if HVG directly
direct_HVGs = sc.pp.highly_variable_genes(adata_epi, n_top_genes=2000, batch_key='sample', inplace=False) 

np.unique(direct_HVGs[direct_HVGs['highly_variable']==True].index.isin(merged_HVG_list), return_counts=True)

In [None]:
sc.pp.highly_variable_genes(adata_epi, n_top_genes=2000, batch_key='sample') 

### preprocessing

In [None]:
# default take HVG
sc.tl.pca(adata_epi, svd_solver='arpack', n_comps = 50, use_highly_variable=True)
sc.pl.pca_variance_ratio(adata_epi, log=False)

In [None]:
sc.pp.neighbors(adata_epi, n_neighbors=20, n_pcs=15)
sc.tl.umap(adata_epi)
#sc.tl.diffmap(adata_epi)
sc.tl.louvain(adata_epi, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi, key_added='leiden')

In [None]:
scv.pl.scatter(adata_epi, basis='umap', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

adata_epi.write(Path(new_data_folder)/'202305_CB_epi_cells.h5')

### add pc10 nn20

In [None]:
adata_epi_20_10 = sc.pp.neighbors(adata_epi, n_neighbors= 20, n_pcs = 10, copy = True)
sc.tl.umap(adata_epi_20_10)
sc.tl.louvain(adata_epi_20_10, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_20_10, key_added='leiden')

In [None]:
adata_epi.obsm['X_umap_pc10_nn20'] = adata_epi_20_10.obsm['X_umap']

adata_epi.write(Path(new_data_folder)/'202306_CB_epi_cells_umap.h5')

### Compare different PC

adata_epi = sc.read(Path(new_data_folder)/'202306_CB_epi_Numbat_Scitcem_inferCNV.h5')

In [None]:
adata_epi_copy = adata_epi.copy()

In [None]:
adata_epi_copy

#### nn 20 pc 7 

In [None]:
adata_epi_20_7 = sc.pp.neighbors(adata_epi, n_neighbors=20, n_pcs=7, copy = True)

In [None]:
sc.tl.umap(adata_epi_20_7)
sc.tl.louvain(adata_epi_20_7, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_20_7, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_20_7'] = adata_epi_20_7.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_20_7', color=['sample_origin', 'sample',
                                                         'patient', 'MS_status',
                                                         'leiden', 'louvain',
                                                         'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 15 pc 7

In [None]:
adata_epi_15_7 = sc.pp.neighbors(adata_epi, n_neighbors= 15, n_pcs=7, copy = True)

In [None]:
sc.tl.umap(adata_epi_15_7)
sc.tl.louvain(adata_epi_15_7, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_15_7, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_15_7'] = adata_epi_15_7.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_15_7', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 50 pc 7

In [None]:
adata_epi_50_7 = sc.pp.neighbors(adata_epi, n_neighbors= 50, n_pcs=7, copy = True)
sc.tl.umap(adata_epi_50_7)
sc.tl.louvain(adata_epi_50_7, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_50_7, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_50_7'] = adata_epi_50_7.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_50_7', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 50 pc 10

In [None]:
adata_epi_50_10 = sc.pp.neighbors(adata_epi, n_neighbors= 50, n_pcs = 10, copy = True)
sc.tl.umap(adata_epi_50_10)
sc.tl.louvain(adata_epi_50_10, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_50_10, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_50_10'] = adata_epi_50_10.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_50_10', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 20 pc 10

In [None]:
adata_epi_20_10 = sc.pp.neighbors(adata_epi, n_neighbors= 20, n_pcs = 10, copy = True)
sc.tl.umap(adata_epi_20_10)
sc.tl.louvain(adata_epi_20_10, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_20_10, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_20_10'] = adata_epi_20_10.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_20_10', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 15 pc 10

In [None]:
adata_epi_15_10 = sc.pp.neighbors(adata_epi, n_neighbors= 15, n_pcs = 10, copy = True)
sc.tl.umap(adata_epi_15_10)
sc.tl.louvain(adata_epi_15_10, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_15_10, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_15_10'] = adata_epi_15_10.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_15_10', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 50 pc 15

In [None]:
adata_epi_50_15 = sc.pp.neighbors(adata_epi, n_neighbors= 50, n_pcs = 15, copy = True)
sc.tl.umap(adata_epi_50_15)
sc.tl.louvain(adata_epi_50_15, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_50_15, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_50_15'] = adata_epi_50_15.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_50_15', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 15 pc 15

In [None]:
adata_epi_15_15 = sc.pp.neighbors(adata_epi, n_neighbors= 15, n_pcs = 15, copy = True)
sc.tl.umap(adata_epi_15_15)
sc.tl.louvain(adata_epi_15_15, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_15_15, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_15_15'] = adata_epi_15_15.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_15_15', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 15 pc 20

In [None]:
adata_epi_15_20 = sc.pp.neighbors(adata_epi, n_neighbors= 15, n_pcs = 20, copy = True)
sc.tl.umap(adata_epi_15_20)
sc.tl.louvain(adata_epi_15_20, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_15_20, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_15_20'] = adata_epi_15_20.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_15_20', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 20 pc 20

In [None]:
adata_epi_20_20 = sc.pp.neighbors(adata_epi, n_neighbors= 20, n_pcs = 20, copy = True)
sc.tl.umap(adata_epi_20_20)
sc.tl.louvain(adata_epi_20_20, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_20_20, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_20_20'] = adata_epi_20_20.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_20_20', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)

#### nn 50 pc 20

In [None]:
adata_epi_50_20 = sc.pp.neighbors(adata_epi, n_neighbors= 50, n_pcs = 20, copy = True)
sc.tl.umap(adata_epi_50_20)
sc.tl.louvain(adata_epi_50_20, key_added='louvain', resolution=1)
sc.tl.leiden(adata_epi_50_20, key_added='leiden')

In [None]:
adata_epi_copy.obsm['X_umap_50_20'] = adata_epi_50_20.obsm['X_umap']

In [None]:
scv.pl.scatter(adata_epi_copy, basis='umap_50_20', color=['sample_origin', 'sample', 
                                               'patient', 'MS_status', 
                                               'leiden', 'louvain', 
                                                        'numbat', 'scitcem_call', 'inferCNV_result'], 
               ncols=2, dpi=300, legend_loc='right margin', size = 2)