# CNA on the Reyes et. al sepsis dataset

In [None]:
import sys
sys.path.append("/data/srlab/lrumker/MCSC_Project/cna-display/")

In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import cna
import matplotlib.gridspec as gridspec
import pp, pf
plt.style.use('../pp.mplstyle')
np.random.seed(0) # for reproducibility

In [None]:
import pickle

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()

# Recreate authors 'data preprocessing steps

In [None]:
results_file = '/data/srlab/lrumker/MCSC_Project/sepsis_data/pbmc.h5ad'
results_pp_file = '/data/srlab/lrumker/MCSC_Project/sepsis_data/pbmc_pp.h5ad'
results_cna_file = '/data/srlab/lrumker/MCSC_Project/sepsis_data/pbmc_cna.h5ad'

In [None]:
adata = sc.read_h5ad('/data/srlab/lrumker/MCSC_Project/sepsis_data/scp_scanpy.h5ad') # data from SingleCellPortal

print(adata)
np.unique(adata.obs['cell_type']).tolist()
adata.n_obs

The publication lists 126,351 cells total. The discrepancy is due to the publicly available data from SingleCellPortal omitting MK cells. 

In [None]:
## Same filters as original paper

# Filter out cells with fewer than 100 genes
sc.pp.filter_cells(adata, min_counts=100) 

# Filter out genes with representation across fewer than 10 cells
sc.pp.filter_genes(adata, min_cells=10)

# Annotate the group of mitochondrial genes as 'mt'
adata.var['mt'] = adata.var_names.str.startswith('MT-')  
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

# Plot distribution of key QC emtrics
sc.pl.violin(adata, ['total_counts', 'n_genes_by_counts', 'pct_counts_mt'],
             multi_panel=True, stripplot=False)

In [None]:
## Normalize
# Methods did not mention normalization, but we assume they did this
sc.pp.normalize_total(adata, target_sum=1e4) 
sc.pp.log1p(adata)

## Preprocessing & PCA

In [None]:
# Identify highly variable genes
# Parameters from Reyes et al paper
sc.pp.highly_variable_genes(adata, min_mean=0.0125, min_disp=0.5) 

# Subset to onlg highly variable genes
adata = adata[:, adata.var.highly_variable]
adata.n_vars

In [None]:
# Scale data with default parameters
sc.pp.scale(adata)

sc.tl.pca(adata, svd_solver='arpack')

## Visual embedding with tSNE
(will match tSNE in the original paper)

In [None]:
np.random.seed(0) # for reproducibility

print('computing and storing nearest neighbor graph')
sc.pp.neighbors(adata) # default parameters

print('computing and storing umap')
sc.tl.umap(adata)

print('computing and storing tsne')
sc.tl.tsne(adata, n_pcs = 10, random_state = 0)

In [None]:
# Plot tSNE to affirm that it matches the original paper
sc.pl.tsne(adata, color=['cell_type', 'cell_state'], legend_loc='on data', legend_fontsize = 'small')

# Visually examine scope of potential batch effects,
# since these were not corrected for in the initial paper, we apply 
# no additional pre-processing for batch correction
sc.pl.tsne(adata, color=['pheno', 'patient'], legend_fontsize = 'x-small')

## Remove DC-enriched cells

`adata` includes the DC-enriched cells, whereas `d` does not (used for CNA)

In [None]:
## Remove DC-enriched cells
d = adata[adata.obs['sort'] == 'CD45', :]
d.obs.groupby(['sort']).size()

## Make samplem

In [None]:
## Extract batch number from end of index (in case we wish to use later)
new = d.obs.index.str.split("-", n = 1, expand = False)
batches = [i[1] for i in new]
d.obs['batch_num'] = batches

## For this analysis, treat everything as one batch
d.obs['batch'] = np.ones(d.obs.shape[0])

## Set id to patient
d.obs['id'] = d.obs['patient']

## Make sampleXmeta dataframe
select = d.obs[['pheno', 'id', 'batch_num']]
sampleXmeta = select.drop_duplicates(subset = "id", keep = 'first', inplace = False) 

## Ensure that sampleXmeta has index 'id' column matching an obs 'id' column
sampleXmeta.reset_index(drop=True, inplace=True)
sampleXmeta.set_index('id', inplace = True)

## Save sampleXmeta to d
d.samplem = sampleXmeta.copy()

In [None]:
d.samplem.head(2)

In [None]:
d.obs.head(2)

In [None]:
# save the pre-processed data
d.write(results_file)

## CNA preprocessing

In [None]:
d = cna.read(results_file)

In [None]:
d.samplem

In [None]:
type(d)

In [None]:
# copy technical covariate and batch information from per-cell metadata to per-sample metadata
d.obs_to_sample(['batch', 'n_counts', 'n_genes_by_counts', 'pct_counts_mt'])

In [None]:
## The existing nearest neighbor graph was calculated with DC-enriched cells (needed to construct original UMAP)
## For CNA we compute a nearest neighbor graph without the DC-enriched cells
print('computing and storing nearest neighbor graph')
sc.pp.neighbors(d)

In [None]:
## Plot tSNE with DC enriched cells removed
# Note that the tSNE coordinates are unchanged from above
sc.pl.tsne(d, color=['cell_type', 'cell_state'], legend_loc='on data', legend_fontsize = 'small')

In [None]:
# Compute KNN graph
cna.pp.knn(d)

In [None]:
# save the pre-processed data
#d.write(results_pp_file)

# Apply CNA

In [None]:
#d = cna.read(results_pp_file)

## Run association test on main Phenotype 1: Any sepsis vs. Any non-sepsis

In [None]:
# some convenient shorthand
sm = d.samplem # sample meta-data
batches = sm.batch # batch info

In [None]:
d.samplem

In [None]:
## Define phenotype of interest as ANY sepsis

any_sepsis = np.zeros(d.samplem.shape[0])
any_sepsis[np.where(d.samplem['pheno']=='Bac-SEP')[0]] = 1
any_sepsis[np.where(d.samplem['pheno']=='ICU-SEP')[0]] = 1
any_sepsis[np.where(d.samplem['pheno']=='URO')[0]] = 1
any_sepsis[np.where(d.samplem['pheno']=='Int-URO')[0]] = 1

d.samplem['AnySepsis'] = any_sepsis

In [None]:
## Set phenotype and covariates
y = sm.AnySepsis.values
covs = None

In [None]:
# Run association test
np.random.seed(0) # for reproducibility
res = cna.tl._association.association(d, y, batches=batches, covs=covs, Nnull=100000)
print()
print('p =', res.p, ',', res.k, 'PCs used') #6.99e-5

In [None]:
# Global association test p-value
print('Global association test p-value: ', res.p, ',', res.k, 'PCs used')

# Correlation threshold for 5% FDR
print('Correlation threshold for 5% FDR', res.fdr_5p_t)

# Correlation threshold for 10% FDR
print('Correlation threshold for 10% FDR', res.fdr_10p_t)

# Number of neighborhoods with local associations at 5% FDR
n = np.sum(abs(res.ncorrs) > res.fdr_5p_t)
print('Number of neighborhoods with local associations', n)

In [None]:
# Variance explained
res.r2

## Make some plots

In [None]:
## Save cell labels of whether they pass fdr threshold
d.obs['cna_cell_scores'] = res.ncorrs

d.obs['pos_assoc_fdr5'] = np.where(d.obs['cna_cell_scores']> res.fdr_5p_t, True, False) 
d.obs['neg_assoc_fdr5'] = np.where(d.obs['cna_cell_scores']< -res.fdr_5p_t, True, False) 

# Version 1: Intepret phenotype

In [None]:
def plot_clust_hist(cluster_sel, cell_scores, ax, d, fdr_thresh, sel_color):
    ax.hist(cell_scores[np.where(d.obs['cell_state']==str(cluster_sel))[0]], 
            color = sel_color, 
            bins = 50, range = (-0.5,0.5))
    ax.axvline(fdr_thresh, label = 'FDR_5p', color = 'purple', lw = 1)
    ax.axvline(-fdr_thresh, color = 'purple', lw = 1)
    ax.set_xticks([-0.5, 0, 0.5])
    ax.set_yticks([])
    ax.set_title(cluster_sel)
    ax.labelcolor = sel_color

### Examine within-cluster heterogeneity

In [None]:
fig, axs = plt.subplots(5,3, figsize=(6,6))

axs_list = [axs[0,0], axs[0,1], axs[0,2],
           axs[1,0], axs[1,1], axs[1,2],
           axs[2,0], axs[2,1], axs[2,2],
           axs[3,0], axs[3,1], axs[3,2],
           axs[4,0], axs[4,1], axs[4,2]]

for i in np.arange(15):
    plot_clust_hist(np.unique(d.obs.cell_state)[i], res.ncorrs, axs_list[i], d, res.fdr_5p_t, "steelblue")


axs_list[0].plot(-.5,100,'*', c= "red")
axs_list[1].plot(-.5,60,'*', c= "red")
axs_list[3].plot(-.5,7,'*', c= "red")
axs_list[4].plot(-.5,10,'*', c= "red")
axs_list[8].plot(-.5,100,'*', c= "red")
axs_list[9].plot(-.5,200,'*', c= "red")
axs_list[12].plot(-.5,200,'*', c= "red")
axs_list[13].plot(-.5,350,'*', c= "red")
plt.tight_layout()
plt.savefig('../_figs/suppfig.sepsis_heterogeneity.pdf')

### Affirm lack of cluster association for MS4 cluster

In [None]:
# Assign cell groups
d.obs['MS4_type'] = np.repeat("0", d.obs.shape[0])
d.obs['MS4_type'].iloc[np.where(d.obs['cell_state']=="MS4")[0]] = "1"

In [None]:
from methods import methods

In [None]:
cell_counts = d.obs['id'].value_counts()
d.samplem["C"] = [cell_counts.values[np.where(cell_counts.index==d.samplem.index[i])[0]][0]\
               for i in np.arange(d.samplem.shape[0])]

In [None]:
abundance of CNA population and phenotype in permutation test should show something

In [None]:
# To verify an association using MASC
masc_res = methods._MASC(d, y.astype(int),
      batches.values.astype(int),
      d.samplem["C"].values,
      None, # sample-level covariates                                                                                                                                                      
      None, #d.obs[['n_counts', 'pct_counts_mt']].values, # Cell-level covariates
      clustertype='MS4_type')

In [None]:
# Odds Ratio
np.exp(-0.00614)

### Store associated populations

In [None]:
FDR_thresh = res.fdr_5p_t

# Cell scores per neighborhood
d.obs['ncorrs'] = res.ncorrs

# Positively-associated cells
d.obs['poscells'] = np.repeat(False, d.obs.shape[0])
d.obs['poscells'].loc[d.obs['ncorrs']>FDR_thresh] = True

# Negatively-associated cells
d.obs['negcells'] = np.repeat(False, d.obs.shape[0])
d.obs['negcells'].loc[d.obs['ncorrs']<-FDR_thresh] = True

In [None]:
d.obs['ncorrs'] = res.ncorrs

In [None]:
geneset_scores = pd.read_csv("/data/srlab/lrumker/MCSC_Project/mcsc_scratch/sepsis_geneset_scores.csv")

In [None]:
nampc_geneset_corrs = np.abs(np.corrcoef(np.concatenate((d.uns['NAM_nbhdXpc'].iloc[:,0:10], 
                            geneset_scores), axis = 1).T))[10:25,0:10]


In [None]:
pathways = geneset_scores.columns
pathways = [pathways[i][4:] for i in np.arange(15)]
pathways = [pathways[i][:-8] for i in np.arange(15)]

In [None]:
nampc_geneset_corrs = pd.DataFrame(nampc_geneset_corrs, index = pathways)

In [None]:
nampc_geneset_corrs

In [None]:
enriched_pathways = pd.read_csv("/data/srlab/lrumker/MCSC_Project/mcsc_scratch/sepsis_enriched_pathways.csv")

In [None]:
enriched_pathways["Pathway"] = [enriched_pathways["pathway"][i][4:] for i in np.arange(1,16)]
enriched_pathways["Pathway"] = [enriched_pathways["Pathway"][i][:-8] for i in np.arange(1,16)]
enriched_pathways["Enrichment"] = np.around(enriched_pathways["ES"],2)
enriched_pathways["P, Adjusted"] = np.around(enriched_pathways["padj"],4)
enriched_pathways["NAM PC0 Cor"] = np.around(nampc_geneset_corrs.iloc[:,0].values,2)
enriched_pathways["NAM PC1 Cor"] = np.around(nampc_geneset_corrs.iloc[:,1].values,2)

In [None]:
enriched_pathways.drop(["ES","pval","padj", "pathway"], axis = 1)

In [None]:
for i in np.arange(nampc_geneset_corrs.shape[0]):
    plt.plot(np.arange(6)+1, nampc_geneset_corrs.iloc[i,0:6],
            label = nampc_geneset_corrs.index[i])
plt.legend()

In [None]:
for i in [0,3, 10]:
    plt.plot(np.arange(nampc_geneset_corrs.shape[1])+1, nampc_geneset_corrs.iloc[i,:],
            label = nampc_geneset_corrs.index[i])
plt.legend()

In [None]:
from matplotlib.cm import get_cmap

In [None]:
fig = plt.figure(figsize = (7,3))
gs = fig.add_gridspec(nrows=7, ncols=12, 
                    wspace=1, hspace = 2.5)
ax1 = fig.add_subplot(gs[0:3,0:3]) 
ax2 = fig.add_subplot(gs[0:3,3:6]) 
ax3 = fig.add_subplot(gs[3:6,0:3]) 
ax4 = fig.add_subplot(gs[3:6,3:6])
ax5 = fig.add_subplot(gs[0:6,6:12])

ax=ax1
c=d.uns['NAM_nbhdXpc'].iloc[:,0]
c = c-np.mean(c)
c = c/np.std(c)
cutoff = np.max([-np.percentile(c, 10), np.percentile(c, 90)])
ax.scatter(d.obsm['X_tsne'][loc_assoc,0], d.obsm['X_tsne'][loc_assoc,1],
           alpha=0.5, c=c[loc_assoc], cmap='seismic', 
           vmin=-cutoff, vmax=cutoff, **pp.umapprops)
ax.set_title('NAM PC0')
ax.axis('off')

ax = ax2
c=geneset_scores.PID_PDGFRB_PATHWAY.values
c = c-np.mean(c)
c = c/np.std(c)
cutoff = np.max([-np.percentile(c, 10), np.percentile(c, 90)])
ax.scatter(d.obsm['X_tsne'][loc_assoc,0], d.obsm['X_tsne'][loc_assoc,1],
           alpha=0.5, c=c[loc_assoc], cmap='seismic', 
           vmin=-cutoff, vmax=cutoff, **pp.umapprops)
ax.text(0, 0.9, '$R = {:.2f}$'.format(0.52),
        transform=ax.transAxes, fontsize=6, color="black")
ax.set_title('PDGFRB Pathway')
ax.axis('off')

ax=ax3
c=-d.uns['NAM_nbhdXpc'].iloc[:,1]
c = c-np.mean(c)
c = c/np.std(c)
cutoff = np.max([-np.percentile(c, 10), np.percentile(c, 90)])
ax.scatter(d.obsm['X_tsne'][loc_assoc,0], d.obsm['X_tsne'][loc_assoc,1],
           alpha=0.5, c=c[loc_assoc], cmap='seismic', 
           vmin=-cutoff, vmax=cutoff, **pp.umapprops)
ax.set_title('NAM PC1')
ax.axis('off')

ax = ax4
c=geneset_scores.PID_TOLL_ENDOGENOUS_PATHWAY.values
c = c-np.mean(c)
c = c/np.std(c)
cutoff = np.max([-np.percentile(c, 10), np.percentile(c, 90)])
ax.scatter(d.obsm['X_tsne'][loc_assoc,0], d.obsm['X_tsne'][loc_assoc,1],
           alpha=0.5, c=c[loc_assoc], cmap='seismic', 
           vmin=-cutoff, vmax=cutoff, **pp.umapprops)
ax.text(0, 0.9, '$R = {:.2f}$'.format(0.61),
        transform=ax.transAxes, fontsize=6, color="black")
ax.set_title('Toll Pathway')
ax.axis('off')

ax = ax5
for i in np.arange(nampc_geneset_corrs.shape[0]):
    plt.plot(np.arange(6)+1, nampc_geneset_corrs.iloc[i,0:6],
            label = nampc_geneset_corrs.index[i], color = colors[i])
plt.legend()
ax.set_xticks([1,2,3,4,5,6])
ax.set_yticks([0,0.7])
ax.set_xlabel("NAM PC")
ax.set_ylabel("Correlation to Gene Set")
ax.yaxis.set_label_coords(-0.02,0.5)

plt.tight_layout()
plt.savefig('../_figs/rawsupfig.sepsis.pdf')

In [None]:
fig = plt.figure(figsize = (6,4))
gs = fig.add_gridspec(nrows=9, ncols=12, 
                    wspace=0.5, hspace = 1)
ax1 = fig.add_subplot(gs[0:4,0:4]) 
ax2 = fig.add_subplot(gs[0:4,4:8])
ax3 = fig.add_subplot(gs[0:4,8:12])
ax4 = fig.add_subplot(gs[4:6,0:3])
ax5 = fig.add_subplot(gs[4:6,3:6])
ax6 = fig.add_subplot(gs[4:6,6:9])
ax7 = fig.add_subplot(gs[4:6,9:12])
ax8 = fig.add_subplot(gs[6:9,0:3])
ax9 = fig.add_subplot(gs[6:9,3:6])
ax10 = fig.add_subplot(gs[6:9,6:9])
ax11 = fig.add_subplot(gs[6:9,9:12])


ax = ax2
c=res.ncorrs
c = c-np.mean(c)
c = c/np.std(c)
cutoff = np.max([-np.percentile(c, 10), np.percentile(c, 90)])
ax.scatter(d.obsm['X_tsne'][:,0], d.obsm['X_tsne'][:,1],
           alpha=0.5, c="grey", **pp.umapprops)
loc_assoc = np.concatenate((np.where(d.obs['poscells'])[0],
               np.where(d.obs['negcells'])[0]))
ax.scatter(d.obsm['X_tsne'][loc_assoc,0], d.obsm['X_tsne'][loc_assoc,1],
           alpha=0.5, c=c[loc_assoc], cmap='seismic', 
           vmin=-cutoff, vmax=cutoff, **pp.umapprops)
ax.set_title('CNA Associations')
ax.axis('off')

ax = ax3
c=geneset_scores.PID_RAC1_PATHWAY.values
c = c-np.mean(c)
c = c/np.std(c)
cutoff = np.max([-np.percentile(c, 10), np.percentile(c, 90)])
ax.scatter(d.obsm['X_tsne'][:,0], d.obsm['X_tsne'][:,1],
           alpha=0.5, c=c, cmap='seismic', 
           vmin=-cutoff, vmax=cutoff, **pp.umapprops)
ax.text(0.65, 0.9, '$R = {:.2f}$'.format(0.63),
        transform=ax.transAxes, fontsize=6, color="black")
ax.set_title('RAC1 Activation')
ax.axis('off')

colors = get_cmap("tab20_r").colors
plot_clust_hist("MS1", res.ncorrs, ax4, d, res.fdr_5p_t, colors[11])
plot_clust_hist("MS4", res.ncorrs, ax5, d, res.fdr_5p_t, colors[15])
plot_clust_hist("BS1", res.ncorrs, ax6, d, res.fdr_5p_t, colors[5])
plot_clust_hist("TS2", res.ncorrs, ax7, d, res.fdr_5p_t, colors[1])

c=d.obs['ncorrs']
c = c-np.mean(c)
c = c/np.std(c)
cutoff = np.max([-np.percentile(c, 10), np.percentile(c, 90)])

ax = ax8
ax.axis('off')
i_cluster = d.obs['cell_state']=="MS1"
ax.set_xlim(-78, 20)
ax.scatter(d.obsm['X_tsne'][i_cluster,0], d.obsm['X_tsne'][i_cluster,1],
           alpha=0.5, c=c[i_cluster], 
           cmap = "seismic", vmin=-cutoff, vmax=cutoff, **pp.umapprops)
#ax.scatter(ms1.obsm['X_umap'][:,0], ms1.obsm['X_umap'][:,1],
#           alpha=0.5, c=c[np.where(d.obs['cell_state']=="MS1")[0]], 
#           cmap = "seismic", vmin=-cutoff, vmax=cutoff, **pp.umapprops)

ax = ax9
ax.axis('off')
i_cluster = d.obs['cell_state']=="MS4"
ax.set_xlim(-80, 40)
ax.scatter(d.obsm['X_tsne'][i_cluster,0], d.obsm['X_tsne'][i_cluster,1],
           alpha=0.5, c=c[i_cluster], 
           cmap = "seismic", vmin=-cutoff, vmax=cutoff, **pp.umapprops)
ax = ax10
ax.axis('off')
i_cluster = d.obs['cell_state']=="BS1"
ax.set_xlim(15, 65)
ax.set_ylim(-68, -32)
ax.scatter(d.obsm['X_tsne'][i_cluster,0], d.obsm['X_tsne'][i_cluster,1],
           alpha=0.5, c=c[i_cluster], 
           cmap = "seismic", vmin=-cutoff, vmax=cutoff, **pp.umapprops)
ax = ax11
ax.axis('off')
i_cluster = d.obs['cell_state']=="TS2"
ax.set_xlim(-3, 85)
ax.set_ylim(-43, 50)
ax.scatter(d.obsm['X_tsne'][i_cluster,0], d.obsm['X_tsne'][i_cluster,1],
           alpha=0.5, c=c[i_cluster], 
           cmap = "seismic", vmin=-cutoff, vmax=cutoff, **pp.umapprops)

ax = ax1
ax.axis('off')

plt.tight_layout()
plt.savefig('../_figs/rawmainfig.sepsis.pdf')

In [None]:
sc.settings.figdir = '../_figs/'

In [None]:
fig = plt.figure(figsize = (6,4))
gs = fig.add_gridspec(nrows=9, ncols=12, 
                    wspace=0.5, hspace = 1)
ax1 = fig.add_subplot(gs[0:4,0:4]) 
ax2 = fig.add_subplot(gs[0:4,4:8])
ax3 = fig.add_subplot(gs[0:4,8:12])
ax4 = fig.add_subplot(gs[4:6,0:3])
ax5 = fig.add_subplot(gs[4:6,3:6])
ax6 = fig.add_subplot(gs[4:6,6:9])
ax7 = fig.add_subplot(gs[4:6,9:12])
ax8 = fig.add_subplot(gs[6:9,0:3])
ax9 = fig.add_subplot(gs[6:9,3:6])
ax10 = fig.add_subplot(gs[6:9,6:9])
ax11 = fig.add_subplot(gs[6:9,9:12])

ax = ax2
ax.axis("off")

ax = ax1
ax.axis('off')
sc.pl.tsne(d, color=['cell_state'], legend_loc='on data', 
           legend_fontsize = 5, title = "Clusters", ax = ax, palette = 'tab20_r', 
          save = "rawmainfig.sepsis_tsne.pdf")

plt.tight_layout()

## Read in results

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white', figsize = (6, 6), fontsize = 16)

In [None]:
d = sc.read_h5ad("/data/srlab/jkang/cna_sepsis/9_Sepsis_CNA/d_anysepsis_author_preprocessing.h5ad")

In [None]:
## Read in results
pickle_off = open("/data/srlab/jkang/cna_sepsis/9_Sepsis_CNA/res_anysepsis_author_preprocessing.pickle", 'rb')
res = pickle.load(pickle_off)

In [None]:
## Save results for R analysis
d.obs['tSNE1'] = d.obsm['X_tsne'][:,0]
d.obs['tSNE2'] = d.obsm['X_tsne'][:,1]

In [None]:
dummy_df = pd.DataFrame(d.obs.loc[:,["id", "pheno", "cell_state", "ncorrs",
                                     "negcells", "poscells", "tSNE1", "tSNE2"]])
dummy_df.to_csv("/data/srlab/lrumker/MCSC_Project/mcsc_scratch/sepsisres.csv")

### Optional:  Re-UMAP Cells within Clusters

In [None]:
## Independently run UMAP and color by per-cell score
ms1 = d[d.obs['cell_state'] == 'MS1', :]
np.random.seed(0) # for reproducibility
print('computing and storing nearest neighbor graph')
sc.pp.neighbors(ms1) # default parameters
print('computing and storing umap')
sc.tl.umap(ms1)

## Independently run UMAP and color by per-cell score
ms4 = d[d.obs['cell_state'] == 'MS4', :]
np.random.seed(0) # for reproducibility
print('computing and storing nearest neighbor graph')
sc.pp.neighbors(ms4) # default parameters
print('computing and storing umap')
sc.tl.umap(ms4)

In [None]:
## Independently run UMAP and color by per-cell score
ms4 = d[d.obs['cell_state'] == 'MS4', :]
np.random.seed(0) # for reproducibility
print('computing and storing nearest neighbor graph')
sc.pp.neighbors(ms4) # default parameters
print('computing and storing umap')
sc.tl.umap(ms4)

In [None]:
## Independently run UMAP and color by per-cell score
bs1 = d[d.obs['cell_state'] == 'BS1', :]
np.random.seed(0) # for reproducibility
print('computing and storing nearest neighbor graph')
sc.pp.neighbors(bs1) # default parameters
print('computing and storing umap')
sc.tl.umap(bs1)

In [None]:
## Independently run UMAP and color by per-cell score
ts2 = d[d.obs['cell_state'] == 'TS2', :]
np.random.seed(0) # for reproducibility
print('computing and storing nearest neighbor graph')
sc.pp.neighbors(ts2) # default parameters
print('computing and storing umap')
sc.tl.umap(ts2)