# Secondary analysis of 10x visium data post-alignment

In [None]:
# Clusters with scVI 
import os
import scvi
import scanpy as sc
import anndata as ad

os.chdir('/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/data/expression-data/sagittal')
adata = ad.read_h5ad('/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/data/expression-data/sagittal/sagittal_vis.h5ad')

# normalize data
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
adata.layers["counts"] = adata.X.copy()  # preserve counts
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
adata.raw = adata
sc.pp.highly_variable_genes(adata, n_top_genes=600, subset=True)

# pca embedding
sc.pp.pca(adata, n_comps=50)
adata.write_h5ad('/Users/amanda.mitchell/Desktop/sagittal_vis_pca.h5ad')

# scVI embedding
scvi.model.SCVI.setup_anndata(adata, layer="counts",
    categorical_covariate_keys=["sample"]) #,continuous_covariate_keys=["percent_mito", "percent_ribo"])
model = scvi.model.SCVI(adata)
model.train()
latent = model.get_latent_representation()
adata.obsm["X_scVI"] = latent
adata.layers["scvi_normalized"] = model.get_normalized_expression(library_size=10e4)
adata.write_h5ad('/Users/amanda.mitchell/Desktop/sagittal_vis_scvi.h5ad')

# plot model metrics
# model.history()
# visualization with scVI embedding
#sc.pp.neighbors(adata, use_rep="X_scVI")
sc.pp.neighbors(adata, use_rep="X_pca")
sc.tl.umap(adata, min_dist=0.3)
sc.pl.umap(adata, color=["WB_subclass"], frameon=False)

# clustering
sc.tl.leiden(adata, key_added="leiden_pca", resolution=0.8)
sc.pl.umap(adata, color=["leiden_pca"], frameon=False)
# sc.tl.leiden(adata, key_added="leiden_scVI", resolution=0.8)
# sc.pl.umap(adata, color=["leiden_scVI"], frameon=False)
adata.write_h5ad('/Users/amanda.mitchell/Desktop/sagittal_vis_analyzed.h5ad')

# differential expression
adata.obs.leiden_pca.head()
de_df = model.differential_expression(groupby="WB_subclass")
de_df.head()

# markers
markers = {}
cats = adata.obs.cell_type.cat.categories
for i, c in enumerate(cats):
    cid = f"{c} vs Rest"
    leiden_pca_df = de_df.loc[de_df.comparison == cid]
    leiden_pca_df = leiden_pca_df[leiden_pca_df.lfc_mean > 0]
    leiden_pca_df = leiden_pca_df[leiden_pca_df["bayes_factor"] > 3]
    leiden_pca_df = leiden_pca_df[leiden_pca_df["non_zeros_proportion1"] > 0.1]
    markers[c] = leiden_pca_df.index.tolist()[:5]

sc.tl.dendrogram(adata, groupby="leiden_pca", use_rep="X_pca")
sc.pl.dotplot(adata, markers, groupby="leiden_pca", dendrogram=True,
    color_map="Blues", swap_axes=True, use_raw=True, standard_scale="var")

dotplot.savefig("/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/figures/leiden_markers_dotplot.png", dpi=300, bbox_inches='tight')  # Save as PNG
   


In [None]:
# cNMF modules with sklearn
from sklearn.decomposition import NMF
import pandas as pd
import numpy as np
import anndata as ad
import os

os.chdir('/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/data/expression-data/sagittal')
adata = ad.read_h5ad('/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/data/expression-data/sagittal/sagittal_vis.h5ad')
#sample_size = int(adata.n_obs * 0.01)
#random_indices = np.random.choice(adata.n_obs, size=sample_size, replace=False)
#adata_sample = adata[random_indices, :]
#counts = adata_sample.to_df()
matrix = adata.to_df()
model = NMF(n_components=20, init='random', random_state=0)
W = pd.DataFrame(model.fit_transform(matrix)) # samples by components
H = pd.DataFrame(model.components_) # components x genes
W.to_csv('/Users/amanda.mitchell/Desktop/sagittal_vis_W.csv')
H.to_csv('/Users/amanda.mitchell/Desktop/sagittal_vis_H.csv')

max_W = W.idxmax(axis=1)
max_W.index = adata.obs.index
adata.obs['module']=pd.DataFrame(max_W)

max_H = H.idxmax(axis=0)
max_H.index = adata.var.index
adata.var['program']=pd.DataFrame(max_H)
adata.write_h5ad('/Users/amanda.mitchell/Desktop/sagittal_vis_cNMF.h5ad')


## module markers
# differential expression
adata.obs.module.head()
de_df = model.differential_expression(groupby="module")
de_df.head()

# markers
markers = {}
cats = adata.obs.module.cat.categories
for i, c in enumerate(cats):
    cid = f"{c} vs Rest"
    module_df = de_df.loc[de_df.comparison == cid]
    module_df = module_df[module_df.lfc_mean > 0]
    module_df = module_df[module_df["bayes_factor"] > 3]
    module_df = module_df[module_df["non_zeros_proportion1"] > 0.1]
    markers[c] = module_df.index.tolist()[:5]

#sc.tl.dendrogram(adata, groupby="module", use_rep="X_pca")
sc.pl.dotplot(adata, markers, groupby="module", dendrogram=False,
    color_map="Blues", swap_axes=True, use_raw=True, standard_scale="var")
 
dotplot.savefig("/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/figures/module_markers_dotplot.png", dpi=300, bbox_inches='tight')  # Save as PNG



In [None]:
# integration with scVI

from sklearn.decomposition import NMF
import pandas as pd
import numpy as np
import anndata as ad
import scanpy as sc
import scvi
import os

os.chdir('/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/data/expression-data/sagittal')
adata1 = ad.read_h5ad('/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/data/expression-data/sagittal/sagittal_vis.h5ad')
sample_size1 = int(adata1.n_obs * 0.01)
random_indices1 = np.random.choice(adata1.n_obs, size=sample_size1, replace=False)
adata1s = adata1[random_indices1, :]

sample_size2 = int(adata1.n_obs * 0.01)
random_indices2 = np.random.choice(adata1.n_obs, size=sample_size1, replace=False)
adata2s = adata1[random_indices2, :]

adata1s.obs['dataset']='1s'
adata2s.obs['dataset']='2s'
adatas = ad.concat([adata1s, adata2s])

scvi.model.SCVI.setup_anndata(adatas, batch_key="dataset")
model = scvi.model.SCVI(adatas, n_layers=2, n_latent=30, gene_likelihood="nb")
model.train()
SCVI_LATENT_KEY = "X_scVI"
adatas.obsm[SCVI_LATENT_KEY] = model.get_latent_representation()
sc.pp.neighbors(adatas, use_rep=SCVI_LATENT_KEY)
sc.tl.leiden(adatas)
SCVI_MDE_KEY = "X_scVI_MDE"
sc.pl.embedding(adatas, basis=SCVI_MDE_KEY, color=["dataset"], frameon=False, ncols=1)
embedding.savefig("/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/figures/dataset_scvit.png", dpi=300)

# markers
markers = {}
cats = adatas.obs.leiden.cat.categories
for i, c in enumerate(cats):
    cid = f"{c} vs Rest"
    cell_type_df = de_df.loc[de_df.comparison == cid]
    cell_type_df = cell_type_df[cell_type_df.lfc_mean > 0]
    cell_type_df = cell_type_df[cell_type_df["bayes_factor"] > 3]
    cell_type_df = cell_type_df[cell_type_df["non_zeros_proportion1"] > 0.1]
    markers[c] = cell_type_df.index.tolist()[:3]

sc.tl.dendrogram(adatas, groupby="leiden", use_rep="X_scVI")
sc.pl.dotplot(adatas, markers, groupby="leiden", dendrogram=True,
    color_map="Blues", swap_axes=True, use_raw=True, standard_scale="var")



In [None]:
# integration wtih scANVI

scanvi_model = scvi.model.SCANVI.from_scvi_model(
    model,
    adata=adata,
    labels_key="cell_type",
    unlabeled_category="Unknown",
)
scanvi_model.train(max_epochs=20, n_samples_per_label=100)
SCANVI_LATENT_KEY = "X_scANVI"
adata.obsm[SCANVI_LATENT_KEY] = scanvi_model.get_latent_representation(adata)
SCANVI_MDE_KEY = "X_scANVI_MDE"
adata.obsm[SCANVI_MDE_KEY] = scvi.model.utils.mde(adata.obsm[SCANVI_LATENT_KEY], accelerator="cpu")
sc.pl.embedding(adata, basis=SCANVI_MDE_KEY, color=["cell_type"], ncols=1, frameon=False)

# integration metrics
bm = Benchmarker(
    adata,
    batch_key="batch",
    label_key="cell_type",
    embedding_obsm_keys=["X_pca", SCVI_LATENT_KEY, SCANVI_LATENT_KEY],
    n_jobs=-1,
)
bm.benchmark()
bm.plot_results_table(min_max_scale=False)
df = bm.get_results(min_max_scale=False)
print(df)


# markers
markers = {}
cats = adata.obs.cell_type.cat.categories
for i, c in enumerate(cats):
    cid = f"{c} vs Rest"
    cell_type_df = de_df.loc[de_df.comparison == cid]
    cell_type_df = cell_type_df[cell_type_df.lfc_mean > 0]
    cell_type_df = cell_type_df[cell_type_df["bayes_factor"] > 3]
    cell_type_df = cell_type_df[cell_type_df["non_zeros_proportion1"] > 0.1]
    markers[c] = cell_type_df.index.tolist()[:3]

sc.tl.dendrogram(adata, groupby="WB_subclass", use_rep="X_scVI")
sc.pl.dotplot(adata, markers, groupby="WB_subclass", dendrogram=True,
    color_map="Blues", swap_axes=True, use_raw=True, standard_scale="var")
   


In [None]:
# graphs of clusters and modules


import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

component_df = pd.get_dummies(adata.obs['component'])
WB_subclass_df = pd.get_dummies(adata.obs['WB_subclass'])
df = pd.concat([component_df, WB_subclass_df], axis=1)

sns.clustermap(component_df, cmap="viridis", figsize=(10,8), metric='euclideam', method="average", linewidths=0.5)

clustermap.ax_heatmap.set_title("Clustered Heatmap of Components vs Cells")
clustermap.ax_heatmap.set_xlabel("Components")
clustermap.ax_heatmap.set_ylabel("Cells")

# Save the clustered heatmap as a PNG file
clustered_filename = "/Users/amanda.mitchell/Dropbox/10x_visium_paper_2024/figures/clustered_modules_clusters.png"
plt.savefig(clustered_filename)
