# Import libraries and setup

In [None]:
# Import libraries we may need
import scanpy as sc
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
import scanpy.external as sce
import wget
import yaml
import wget
import astir
import dill
import umap
import fa2
reducer = umap.UMAP()
import squidpy as sq
import anndata as ad
from scipy.sparse import block_diag

#My module for importing IMC data into AnnData format
import mikeimc as mimc

In [None]:
# Set up output figure settings
plt.rcParams['figure.figsize']=(64,64) #rescale figures, increase sizehere

# Set up scanpy settings
sc.settings.verbosity = 3
sc.set_figure_params(dpi=100, dpi_save=300) #Increase DPI for better resolution figures
#sc.logging.print_versions()

In [None]:
#load the session
dill.load_session('DC_SQUIDPY2.db')

In [None]:
#save the session
dill.dump_session('DC_SQUIDPY2.db')

# Squidpy - neighbourhood analysis

In [None]:
col_df = pd.read_csv('mikeimc_approach/colours/pheno_colours.csv')
colour_palette = col_df.set_index('pheno_cluster').to_dict()
colour_palette['colour']

In [None]:
#The .copy() makes a completely seperate copy for neigh analysis
adata_subset3 = adata_subset2.copy()

In [None]:
adata_subset3.obs['Type']

In [None]:
#This code further filters the data by removing other groups
adata_NL = adata_subset3[~adata_subset3.obs['Type'].isin(['Early death', 'Late death']),:].copy()

In [None]:
#create a copy of anndata and remove the RBC cluster
adata_NL = adata_NL[~adata_NL.obs['pheno_cluster'].isin(['IL1IL6 Virus-infected AT2 cell', 'Virus-Infected Monocyte', 'Virus-infected AT2 cell', 'Virus-infected Alveolar Macrophage', 'Virus-infected CD56High NK cell', 'Virus-infected CD56LowNK cell', 'Virus-infected Epithelial cell', 'Virus-infected Interstitial Macrophage', 'Virus-infected Neutrophil', 'Mesenchymal']),:].copy()

In [None]:
#This code further filters the data by removing other groups
adata_covid = adata_subset3[~adata_subset3.obs['Type'].isin(['NL']),:].copy()

In [None]:
#This code further filters the data by removing other groups
adata_early_death = adata_subset3[~adata_subset3.obs['Type'].isin(['NL', 'Late death']),:].copy()

In [None]:
#adata_subset2.write('./adata_subset2')#This code further filters the data by removing other groups
adata_late_death = adata_subset3[~adata_subset3.obs['Type'].isin(['NL', 'Early death']),:].copy()

In [None]:
adata_subset3.write('./h5ad_files/adata_subset3')

In [None]:
adata_NL.write('./h5ad_files/adata_NL')
adata_covid.write('./h5ad_files/adata_covid')
adata_early_death.write('./h5ad_files/adata_early_death')
adata_late_death.write('./h5ad_files/adata_late_death')

# Subset anndata to run Squidpy analysis in batch

In [None]:
adata_NL

In [None]:
adata_NL.obs['ROI']

In [None]:
adata_NL1 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI1'])].copy()
adata_NL2 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI2'])].copy()
adata_NL3 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI3'])].copy()
adata_NL4 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI4'])].copy()
adata_NL5 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI5'])].copy()
adata_NL6 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI6'])].copy()
adata_NL7 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI7'])].copy()
adata_NL8 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI8'])].copy()
adata_NL9 = adata_NL[adata_NL.obs['ROI'].isin(['NL114_ROI9'])].copy()
adata_NL10 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI1'])].copy()
adata_NL11 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI2'])].copy()
adata_NL12 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI3'])].copy()
adata_NL13 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI4'])].copy()
adata_NL14 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI5'])].copy()
adata_NL15 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI6'])].copy()
adata_NL16 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI7'])].copy()
adata_NL17 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI8'])].copy()
adata_NL18 = adata_NL[adata_NL.obs['ROI'].isin(['NL1915A_ROI9'])].copy()
adata_NL19 = adata_NL[adata_NL.obs['ROI'].isin(['NL1933A_ROI1'])].copy()
adata_NL20 = adata_NL[adata_NL.obs['ROI'].isin(['NL1933A_ROI2'])].copy()
adata_NL21 = adata_NL[adata_NL.obs['ROI'].isin(['NL1933A_ROI4'])].copy()
adata_NL22 = adata_NL[adata_NL.obs['ROI'].isin(['NL1933A_ROI6'])].copy()
adata_NL23 = adata_NL[adata_NL.obs['ROI'].isin(['NL6699_ROI1'])].copy()
adata_NL24 = adata_NL[adata_NL.obs['ROI'].isin(['NL6699_ROI2'])].copy()
adata_NL25 = adata_NL[adata_NL.obs['ROI'].isin(['NL6699_ROI3'])].copy()
adata_NL26 = adata_NL[adata_NL.obs['ROI'].isin(['NL6699_ROI4'])].copy()
adata_NL27 = adata_NL[adata_NL.obs['ROI'].isin(['NL6699_ROI5'])].copy()
adata_NL28 = adata_NL[adata_NL.obs['ROI'].isin(['NL6699_ROI6'])].copy()
adata_NL29 = adata_NL[adata_NL.obs['ROI'].isin(['NL6699_ROI7'])].copy()
adata_NL30 = adata_NL[adata_NL.obs['ROI'].isin(['NL6699_ROI8'])].copy()

In [None]:
adata_list = (adata_NL1, adata_NL2, adata_NL3, adata_NL4, adata_NL5, adata_NL6, adata_NL7, adata_NL8, adata_NL9, adata_NL10, adata_NL11, adata_NL12, adata_NL13,
              adata_NL14, adata_NL15, adata_NL16, adata_NL17, adata_NL18, adata_NL19, adata_NL20, adata_NL21, adata_NL22, adata_NL23, adata_NL24, adata_NL25, adata_NL26,
              adata_NL27, adata_NL28, adata_NL29, adata_NL30)

In [None]:
for a in adata_list:
    sq.gr.spatial_neighbors(a, coord_type = 'generic', n_neighs=4)

In [None]:
adata_NL_merge = ad.concat(adata_list, uns_merge='same')

In [None]:
adata_NL_merge.obsp['spatial_connectivities'] = block_diag([a.obsp['spatial_connectivities'] for a in adata_list]).tocsr()
adata_NL_merge.obsp['spatial_distances'] = block_diag([a.obsp['spatial_distances'] for a in adata_list]).tocsr()

In [None]:
#Change type to categorical
adata_NL_merge.obs['pheno_cluster']=adata_NL_merge.obs['pheno_cluster'].astype('category')

In [None]:
adata_NL_merge

In [None]:
adata_NL_merge.obs['ROI']

In [None]:
adata_early_death

In [None]:
adata_early_death.obs['ROI']

In [None]:
adata_early_death1 = adata_early_death[adata_early_death.obs['ROI'].isin(['C1_ROI1'])].copy()
adata_early_death2 = adata_early_death[adata_early_death.obs['ROI'].isin(['C1_ROI2'])].copy()
adata_early_death3 = adata_early_death[adata_early_death.obs['ROI'].isin(['C1_ROI3'])].copy()
adata_early_death4 = adata_early_death[adata_early_death.obs['ROI'].isin(['C1_ROI4'])].copy()
adata_early_death5 = adata_early_death[adata_early_death.obs['ROI'].isin(['C1_ROI5'])].copy()
adata_early_death6 = adata_early_death[adata_early_death.obs['ROI'].isin(['C1_ROI6'])].copy()
adata_early_death7 = adata_early_death[adata_early_death.obs['ROI'].isin(['C1_ROI7'])].copy()
adata_early_death8 = adata_early_death[adata_early_death.obs['ROI'].isin(['C1_ROI8'])].copy()
adata_early_death9 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI1'])].copy()
adata_early_death10 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI2'])].copy()
adata_early_death11 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI3'])].copy()
adata_early_death12 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI4'])].copy()
adata_early_death13 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI5'])].copy()
adata_early_death14 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI6'])].copy()
adata_early_death15 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI7'])].copy()
adata_early_death16 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI8'])].copy()
adata_early_death17 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI9'])].copy()
adata_early_death18 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI10'])].copy()
adata_early_death19 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI11'])].copy()
adata_early_death20 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI12'])].copy()
adata_early_death21 = adata_early_death[adata_early_death.obs['ROI'].isin(['C3_ROI13'])].copy()
adata_early_death22 = adata_early_death[adata_early_death.obs['ROI'].isin(['C10_ROI1'])].copy()
adata_early_death23 = adata_early_death[adata_early_death.obs['ROI'].isin(['C10_ROI2'])].copy()
adata_early_death24 = adata_early_death[adata_early_death.obs['ROI'].isin(['C10_ROI3'])].copy()
adata_early_death25 = adata_early_death[adata_early_death.obs['ROI'].isin(['C10_ROI4'])].copy()
adata_early_death26 = adata_early_death[adata_early_death.obs['ROI'].isin(['C10_ROI5'])].copy()
adata_early_death27 = adata_early_death[adata_early_death.obs['ROI'].isin(['C10_ROI6'])].copy()
adata_early_death28 = adata_early_death[adata_early_death.obs['ROI'].isin(['C10_ROI7'])].copy()
adata_early_death29 = adata_early_death[adata_early_death.obs['ROI'].isin(['C10_ROI8'])].copy()
adata_early_death30 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI1'])].copy()
adata_early_death31 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI2'])].copy()
adata_early_death32 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI3'])].copy()
adata_early_death33 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI4'])].copy()
adata_early_death34 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI5'])].copy()
adata_early_death35 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI6'])].copy()
adata_early_death36 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI7'])].copy()
adata_early_death37 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI8'])].copy()
adata_early_death38 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI9'])].copy()
adata_early_death39 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI10'])].copy()
adata_early_death40 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI11'])].copy()
adata_early_death41 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI12'])].copy()
adata_early_death42 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI13'])].copy()
adata_early_death43 = adata_early_death[adata_early_death.obs['ROI'].isin(['C12_ROI14'])].copy()

In [None]:
adata_list2 = (adata_early_death1, adata_early_death2, adata_early_death3, adata_early_death4, adata_early_death5, adata_early_death6, adata_early_death7, adata_early_death8, adata_early_death9, adata_early_death10, adata_early_death11, adata_early_death12, adata_early_death13,
              adata_early_death14, adata_early_death15, adata_early_death16, adata_early_death17, adata_early_death18, adata_early_death19, adata_early_death20, adata_early_death21, adata_early_death22, adata_early_death23, adata_early_death24, adata_early_death25, adata_early_death26,
              adata_early_death27, adata_early_death28, adata_early_death29, adata_early_death30, adata_early_death31, adata_early_death32, adata_early_death33, adata_early_death34, adata_early_death35, adata_early_death36, adata_early_death37,
              adata_early_death38, adata_early_death39, adata_early_death40, adata_early_death41, adata_early_death42, adata_early_death43)

In [None]:
for a in adata_list2:
    sq.gr.spatial_neighbors(a, coord_type = 'generic', n_neighs=4)

In [None]:
adata_early_death_merge = ad.concat(adata_list2, uns_merge='same')

In [None]:
adata_early_death_merge.obsp['spatial_connectivities'] = block_diag([a.obsp['spatial_connectivities'] for a in adata_list2]).tocsr()
adata_early_death_merge.obsp['spatial_distances'] = block_diag([a.obsp['spatial_distances'] for a in adata_list2]).tocsr()

In [None]:
#Change type to categorical
adata_early_death_merge.obs['pheno_cluster']=adata_early_death_merge.obs['pheno_cluster'].astype('category')

In [None]:
adata_early_death_merge

In [None]:
adata_late_death

In [None]:
adata_late_death.obs['ROI']

In [None]:
adata_late_death1 = adata_late_death[adata_late_death.obs['ROI'].isin(['C11_ROI2'])].copy()
adata_late_death2 = adata_late_death[adata_late_death.obs['ROI'].isin(['C11_ROI3'])].copy()
adata_late_death3 = adata_late_death[adata_late_death.obs['ROI'].isin(['C11_ROI4'])].copy()
adata_late_death4 = adata_late_death[adata_late_death.obs['ROI'].isin(['C11_ROI5'])].copy()
adata_late_death5 = adata_late_death[adata_late_death.obs['ROI'].isin(['C11_ROI6'])].copy()
adata_late_death6 = adata_late_death[adata_late_death.obs['ROI'].isin(['C11_ROI7'])].copy()
adata_late_death7 = adata_late_death[adata_late_death.obs['ROI'].isin(['C11_ROI8'])].copy()
adata_late_death8 = adata_late_death[adata_late_death.obs['ROI'].isin(['C11_ROI11'])].copy()
adata_late_death9 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI1'])].copy()
adata_late_death10 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI2'])].copy()
adata_late_death11 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI3'])].copy()
adata_late_death12 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI4'])].copy()
adata_late_death13 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI5'])].copy()
adata_late_death14 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI7'])].copy()
adata_late_death15 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI8'])].copy()
adata_late_death16 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI9'])].copy()
adata_late_death17 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI10'])].copy()
adata_late_death18 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI11'])].copy()
adata_late_death19 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI12'])].copy()
adata_late_death20 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI13'])].copy()
adata_late_death21 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI14'])].copy()
adata_late_death22 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI15'])].copy()
adata_late_death23 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI16'])].copy()
adata_late_death24 = adata_late_death[adata_late_death.obs['ROI'].isin(['C21_ROI17'])].copy()
adata_late_death25 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI1'])].copy()
adata_late_death26 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI2'])].copy()
adata_late_death27 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI3'])].copy()
adata_late_death28 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI4'])].copy()
adata_late_death29 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI5'])].copy()
adata_late_death30 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI6'])].copy()
adata_late_death31 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI7'])].copy()
adata_late_death32 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI8'])].copy()
adata_late_death33 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI9'])].copy()
adata_late_death34 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI10'])].copy()
adata_late_death35 = adata_late_death[adata_late_death.obs['ROI'].isin(['C24_ROI11'])].copy()

In [None]:
adata_list3 = (adata_late_death1, adata_late_death2, adata_late_death3, adata_late_death4, adata_late_death5, adata_late_death6, adata_late_death7, adata_late_death8, adata_late_death9, adata_late_death10, adata_late_death11, adata_late_death12, adata_late_death13,
              adata_late_death14, adata_late_death15, adata_late_death16, adata_late_death17, adata_late_death18, adata_late_death19, adata_late_death20, adata_late_death21, adata_late_death22, adata_late_death23, adata_late_death24, adata_late_death25, adata_late_death26,
              adata_late_death27, adata_late_death28, adata_late_death29, adata_late_death30, adata_late_death31, adata_late_death32, adata_late_death33, adata_late_death34, adata_late_death35)

In [None]:
for a in adata_list3:
    sq.gr.spatial_neighbors(a, coord_type = 'generic', n_neighs=4)

In [None]:
adata_late_death_merge = ad.concat(adata_list3, uns_merge='same')

In [None]:
adata_late_death_merge.obsp['spatial_connectivities'] = block_diag([a.obsp['spatial_connectivities'] for a in adata_list3]).tocsr()
adata_late_death_merge.obsp['spatial_distances'] = block_diag([a.obsp['spatial_distances'] for a in adata_list3]).tocsr()

In [None]:
#Change type to categorical
adata_late_death_merge.obs['pheno_cluster']=adata_late_death_merge.obs['pheno_cluster'].astype('category')

In [None]:
adata_late_death_merge

# Run Squidpy in batch

In [None]:
import nhood_enrich as ne
ne.nhood_enrichment_hyperion(adata_early_death_merge,'pheno_cluster','ROI',average_over_rois=True, misc_table=True)

In [None]:
sq.pl.nhood_enrichment(adata_early_death_merge, cluster_key="pheno_cluster", palette=colour_palette['colour'], cmap='bwr', mode='zscore', vmin=-2, vmax=2, save='NE_ED_zscore_pheno_cluster_average.pdf')



In [None]:
adata_early_death_merge.uns['pheno_cluster_nhood_enrichment']

In [None]:
tmp = pd.crosstab(adata_early_death_merge.uns['pheno_cluster_nhood_enrichment'], adata_early_death_merge.uns['pheno_cluster_nhood_enrichment'])

In [None]:
tmp

# # Spatial neighbors and neighborhood enrichment

In [None]:
#sq.gr.spatial_neighbors(adata_late_death, coord_type = 'generic', n_neighs=4)
#n_neighs=4, radius=2
#delaunay

In [None]:
sq.gr.nhood_enrichment(adata_early_death, cluster_key="pheno_cluster", seed=1234, copy=False)

In [None]:
sq.pl.nhood_enrichment(adata_early_death, cluster_key="pheno_cluster", cmap='bwr', method='ward', mode='zscore', vmin=-4, vmax=4, save='NE_ED_zscore_pheno_cluster_ward.png') 

In [None]:
sq.pl.nhood_enrichment(adata_early_death_merge, cluster_key="pheno_cluster", vmax=4000, cmap='bwr', mode='count', save='NE_ED_count_pheno_cluster.png')

# Interaction Matrix

In [None]:
#The function computes the number of shared edges in the neighbor graph between clusters.
sq.gr.interaction_matrix(adata_early_death_merge, cluster_key="pheno_cluster", normalized=False)

In [None]:
sq.pl.interaction_matrix(adata_early_death_merge, cluster_key="pheno_cluster", vmax=4000, cmap='inferno',save='ED_interaction_matrix_pheno_cluster.pdf' )

# Ripley’s statistics

In [None]:
mode = "L"
sq.gr.ripley(adata_late_death_merge, cluster_key="pheno_cluster", mode='L', max_dist=500, copy=False, seed=1234)

In [None]:
sq.pl.ripley(adata_late_death_merge, cluster_key="pheno_cluster", mode=mode, save='LD_ripley_pheno_cluster.png')


# Centrality Scores

In [None]:
sq.gr.centrality_scores(
    adata_late_death_merge,
    cluster_key="pheno_cluster", copy=False
)

In [None]:
sq.pl.centrality_scores(adata_late_death_merge, cluster_key="pheno_cluster", figsize=(20, 10), s=500, save='LD_centrality_scores_pheno_cluster.png')


# Co-occurrence across spatial dimensions

In [None]:
#Plot co-occurence score to visualize cluster co-occurrence in spatial dimensions using the original spatial coordinates. 
#We visualize the result for two conditional groups, here use the virus-infected cells.
sq.gr.co_occurrence(adata_late_death_merge, cluster_key='pheno_cluster', spatial_key='spatial', n_steps=50, copy=False)

In [None]:
#from matplotlib.colors import ListedColormap

sq.pl.co_occurrence(
    adata_late_death_merge,
    cluster_key='pheno_cluster',
    clusters=['Virus-infected Neutrophil'],
    figsize=(20, 10), save='LD_ve_neutro_co_ocurrence.png'
)

#palette=colour_palette['colour']

# Spatially variable proteins with spatial autocorrelation statistics

In [None]:
#two spatial autocorrelation statistics: Moran’s I and Geary’s C*. 
#They provide a score on the degree of spatial variability of gene expression. 
#The statistic as well as the p-value are computed for each gene, and FDR correction is performed. 
df = sq.gr.spatial_autocorr(adata_covid, mode="moran")
adata_covid.uns["moranI"].head(50)

# Voroni plots

In [None]:
from voronoi_imc import draw_voronoi_scatter

for i in adata_covid.obs['ROI'].unique().tolist():

    spot = adata_late_death.obs[adata_late_death.obs['ROI']==i]

    _ = draw_voronoi_scatter(spot=spot,
                             c=[],
                             voronoi_palette = 'pheno_cluster',
                             X='X_loc',
                             Y='Y_loc',
                             voronoi_hue='pheno_cluster')
    plt.savefig(str(i)+'.png')

In [None]:
#stack bar code and extracting number of cells, distribution of each cluster per case or the cluster composition per case
#for number of cells - don't use normalize
#for distribution of each case per cluster - use normalize = "index"
#for cluster composition per case - use normalize = "columns"
tmp = pd.crosstab(adata_subset3.obs['pheno_cluster'],adata_subset3.obs['Type'], margins=False, margins_name='Total', normalize='columns')

In [None]:
tmp

In [None]:
tmp2 = tmp.reindex(['Virus-infected Epithelial cell', 'Virus-infected AT2 cell', 'IL1IL6 Virus-infected AT2 cell','Virus-infected Alveolar Macrophage','Virus-infected Interstitial Macrophage', 'Virus-Infected Monocyte', 'Virus-infected Neutrophil',
                   'Virus-infected CD56LowNK cell', 'Virus-infected CD56High NK cell', 'AT2 cell', 'Epithelial cell', 'EC', 'Activated EC', 
                   'Alveolar Macrophage', 'Interstitial Macrophage', 'Monocyte', 'Neutrophil', 'Mast cell','Dendritic cell', 'NK cell', 'CD4 T cell', 'CD8 T cell', 'Proliferative CD8 T cell', 'B cell', 
                   'Fibroblast', 'SM cell','Mesenchymal'])

In [None]:
tmp3 = pd.DataFrame.transpose(tmp2)

In [None]:
#color for stack bars for the disease groups matching the other bar graphs
#F08080 - light coral
#87CEFA - lightskyblue
#D3D3D3 - lightgrey

pretty_colors = ['#F08080','#87CEFA','#D3D3D3']
color_pal = sb.color_palette('Spectral', 28)

tmp3.plot.bar(stacked=True,color=color_pal, figsize=(4, 4)).legend(bbox_to_anchor=(1, 1))