In [2]:
import anndata
import matplotlib.colors as clr
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc

from mcDETECT.utils import *
from mcDETECT.model import *

import warnings
warnings.filterwarnings("ignore")
sc.settings.verbosity = 0

In [None]:
# File paths
dataset = "MERSCOPE_WT_1"
data_path = f"../data/{dataset}/"
output_path = f"../output/{dataset}/"

if dataset == "MERSCOPE_WT_1":
    cut = True
    cutoff = 6250
    theta = 10 * np.pi / 180
    coordinate_for_rotation = ["sphere_y", "sphere_x"]
    coordinate_for_cutoff = "global_y"
else:
    cut = True
    cutoff = 6250
    theta = 10 * np.pi / 180
    coordinate_for_rotation = ["sphere_y", "sphere_x"]
    coordinate_for_cutoff = "global_y"

In [6]:
# Colors
color_dct = ["#F56867","#FEB915","#C798EE","#59BE86","#7495D3","#6D1A9C","#15821E","#3A84E6","#997273","#787878","#DB4C6C","#9E7A7A","#554236","#AF5F3C","#93796C","#F9BD3F","#DAB370","#877F6C","#268785"]
color_cts = clr.LinearSegmentedColormap.from_list("magma", ["#000003", "#3B0F6F", "#8C2980", "#F66E5B", "#FD9F6C", "#FBFCBF"], N=256)

In [7]:
# -------------------- Read data -------------------- #

# Cells
adata = sc.read_h5ad(data_path + "processed_data/adata.h5ad")

# Neurons
adata_neuron = adata[adata.obs["cell_type"].isin(["Glutamatergic", "GABAergic"])].copy()

# Transcripts
transcripts = pd.read_parquet(data_path + "processed_data/transcripts.parquet")

# Genes
genes = pd.read_csv(data_path + "processed_data/genes.csv")
genes = list(genes.iloc[:, 0])

# Negative control markers
nc_genes = pd.read_csv(data_path + "processed_data/negative_controls.csv")
nc_genes = list(nc_genes["Gene"])

# Spots
spots = sc.read_h5ad(data_path + "processed_data/spots.h5ad")

In [8]:
sc.set_figure_params(scanpy = True, figsize = (6, 9))
ax = sc.pl.scatter(adata, alpha = 1, x = "global_y_new", y = "global_x_new", color = "cell_type", palette = color_dct, size = 5, title = " ", show = False)
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel("")
ax.set_ylabel("")
for spine in ax.spines.values():
    spine.set_visible(False)
plt.savefig(output_path + "cell_type.png", dpi = 300, bbox_inches = "tight")
plt.close()

In [9]:
# Markers
syn_genes = ["Camk2a", "Cplx2", "Slc17a7", "Ddn", "Syp", "Map1a", "Shank1", "Syn1", "Gria1", "Gria2", "Cyfip2", "Vamp2", "Bsn", "Slc32a1", "Nfasc", "Syt1", "Tubb3", "Nav1", "Shank3", "Mapt"]
len(syn_genes)

20

In [None]:
# ==================== Rough detection (run once) ==================== #
# no size filtering (size_thr = 1e5)
# no in-soma filtering (in_soma_thr = (1.01, 0.01))
# no negative control filtering (nc_genes = None)

mc = mcDETECT(type = "MERSCOPE", transcripts = transcripts, gnl_genes = syn_genes, nc_genes = None, eps = 1.5,
              minspl = 3, grid_len = 1, cutoff_prob = 0.95, alpha = 10, low_bound = 3, size_thr = 1e5,
              in_soma_thr = (1.01, 0.01), l = 1, rho = 0.1, s = 1, nc_top = 15, nc_thr = 0.1)

_, data_low, _ = mc.dbscan(record_cell_id = True)
print("Merging spheres...")
granules = mc.merge_sphere(data_low)
granules.to_parquet(output_path + "all_granules.parquet")

In [10]:
# ==================== Fine detection (run once) ==================== #
# size filtering (size_thr = 4.0)
# in-soma filtering (in_soma_thr = (0.1, 0.9))
# negative control filtering (nc_genes = nc_genes)

mc = mcDETECT(type = "MERSCOPE", transcripts = transcripts, gnl_genes = syn_genes, nc_genes = nc_genes, eps = 1.5,
              minspl = 3, grid_len = 1, cutoff_prob = 0.95, alpha = 10, low_bound = 3, size_thr = 4.0,
              in_soma_thr = (0.1, 0.9), l = 1, rho = 0.1, s = 1, nc_top = 15, nc_thr = 0.1)

granules = mc.detect()

1 out of 20 genes processed!
2 out of 20 genes processed!
3 out of 20 genes processed!
4 out of 20 genes processed!
5 out of 20 genes processed!
6 out of 20 genes processed!
7 out of 20 genes processed!
8 out of 20 genes processed!
9 out of 20 genes processed!
10 out of 20 genes processed!
11 out of 20 genes processed!
12 out of 20 genes processed!
13 out of 20 genes processed!
14 out of 20 genes processed!
15 out of 20 genes processed!
16 out of 20 genes processed!
17 out of 20 genes processed!
18 out of 20 genes processed!
19 out of 20 genes processed!
20 out of 20 genes processed!
Merging spheres...
Negative control filtering...


In [None]:
# Assign region labels
labels_df = pd.DataFrame({"global_x": spots.obs["global_x"], "global_y": spots.obs["global_y"], "brain_area": spots.obs["brain_area"]})
x_grid, y_grid = list(np.unique(labels_df["global_x"])), list(np.unique(labels_df["global_y"]))

granules["brain_area"] = np.nan
for i in range(granules.shape[0]):
    closest_x = closest(x_grid, granules["sphere_x"].iloc[i])
    closest_y = closest(y_grid, granules["sphere_y"].iloc[i])
    target_label = labels_df[(labels_df["global_x"] == closest_x) & (labels_df["global_y"] == closest_y)]
    granules["brain_area"].iloc[i] = target_label["brain_area"][0]

rotation_matrix = np.array([[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]])
coords = granules[coordinate_for_rotation].to_numpy()
transformed_coords = coords @ rotation_matrix.T
granules["global_" + coordinate_for_rotation[0].split("_")[1] + "_new"] = transformed_coords[:, 0]
granules["global_" + coordinate_for_rotation[1].split("_")[1] + "_new"] = transformed_coords[:, 1]
if cut:
    granules[coordinate_for_cutoff + "_new"] = cutoff - granules["global_" + coordinate_for_cutoff + "_new"]

granules.head()

Unnamed: 0,sphere_x,sphere_y,sphere_z,layer_z,sphere_r,size,comp,in_soma_ratio,gene,nc_ratio,brain_area,global_y_new,global_x_new
0,841.70602,2111.386,0.0,0.0,1.099598,5.0,2.0,0.0,Camk2a,0.0,FT,4024.529981,462.280283
1,839.2096,2117.4731,0.0,0.0,1.160026,7.0,3.0,0.0,Camk2a,0.0,FT,4018.968857,458.764775
2,839.05195,2129.9065,0.0,0.0,0.684313,3.0,1.0,0.0,Camk2a,0.0,FT,4006.751724,456.450483
3,842.6394,2129.926,0.0,0.0,1.56093,7.0,1.0,0.0,Camk2a,0.0,FT,4006.109566,459.980046
4,832.27235,2146.3137,0.0,0.0,0.829829,4.0,2.0,0.0,Camk2a,0.0,FT,3991.771051,446.9248


In [13]:
# Save granules
granules.to_parquet(output_path + "granules.parquet")

In [23]:
# Read granules
granules = pd.read_parquet(output_path + "granules.parquet")
granules.shape

(697780, 13)

In [None]:
# Spot-level neuron and granule metadata
spot_neuron_all = spot_neuron(adata_neuron, spots)
spot_neuron_all.write_h5ad("../WT_AD_comparison/MERSCOPE_WT_1_neurons.h5ad")

spot_granule_all = spot_granule(granule = granules, spot = spots)
spot_granule_all.write_h5ad("../WT_AD_comparison/MERSCOPE_WT_1_granules.h5ad")

In [None]:
sc.set_figure_params(scanpy = True, figsize = (6, 9))
ax = sc.pl.scatter(spot_granule_all, alpha = 1, x = "global_y_new", y = "global_x_new", color = "brain_area", palette = color_dct, size = 40, title = " ", show = False)
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel("")
ax.set_ylabel("")
frame_width = 1.5
for spine in ax.spines.values():
    spine.set_linewidth(frame_width)
plt.show()

In [None]:
granule_array = np.array(spot_granule_all.obs["gnl_count"])
neuron_array = np.array(spot_neuron_all.obs["neuron_count"])
area_array = np.array(spot_granule_all.obs["brain_area"])

granule_density, neuron_density = [], []

area_list = ["Isocortex", "OLF", "HPF-CA", "HPF-DG", "HPF-SR", "CTXsp", "TH", "MB", "FT"]
for j in area_list:
    granule_temp = granule_array[area_array == j]
    neuron_temp = neuron_array[area_array == j]
    granule_density.append(np.sum(granule_temp) / len(granule_temp))
    neuron_density.append(np.sum(neuron_temp) / len(neuron_temp))

density_comparison = pd.DataFrame({"area_list": area_list, "granule_density": granule_density, "neuron_density": neuron_density})
density_comparison.to_csv("../WT_AD_comparison/MERSCOPE_WT_1_granule_neuron_density.csv", index = 0)

In [None]:
# Plot granules
granule_adata = anndata.AnnData(X = np.zeros(granules.shape), obs = granules)
granule_adata.obs["brain_area"] = pd.Categorical(granule_adata.obs["brain_area"], categories = ["CTXsp", "FT", "HPF-CA", "HPF-DG", "HPF-SR", "Isocortex", "MB", "OLF", "TH"], ordered = True)

# cutoff = 6250
# granule_adata.obs["global_y_new"] = cutoff - granule_adata.obs["global_y_new"]

sc.set_figure_params(scanpy = True, figsize = (6, 9))
ax = sc.pl.scatter(granule_adata, alpha = 1, x = "global_y_new", y = "global_x_new", color = "brain_area", palette = color_dct, size = 1, title = " ", show = False)
ax.grid(False)
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel("")
ax.set_ylabel("")
for spine in ax.spines.values():
    spine.set_visible(False)
plt.savefig("output/all_granules.png", dpi = 300, bbox_inches = "tight")
plt.close()

In [None]:
# # Plot granules (downsampled)
# n = int(granule_adata.shape[0] / 20)
# np.random.seed(42)
# idx = np.random.choice(granule_adata.shape[0], n, replace=False)
# granule_subset = granule_adata[idx, :]

# ax = sc.pl.scatter(granule_subset, alpha=1, x = "global_y_new", y = "global_x_new", color="brain_area", palette=color_dct, size=20, title=" ", show=False)
# ax.grid(False)
# ax.set_xticks([])
# ax.set_yticks([])
# ax.set_xlabel("")
# ax.set_ylabel("")
# for spine in ax.spines.values():
#     spine.set_visible(False)
# for coll in ax.collections:
#     coll.set_edgecolor("black")
#     coll.set_linewidth(0.1)
# plt.savefig(f"output/all_granules_downsample_{n}.jpeg", dpi=500, bbox_inches="tight")
# plt.close()

In [None]:
# Granule expression profile
granule_adata = mc.profile(granules, genes = genes)
granule_adata

In [None]:
# Normalization
sc.pp.normalize_total(granule_adata, target_sum=1e4)
sc.pp.log1p(granule_adata)

In [None]:
# Granule vs neuron expression (nuclei)
np.random.seed(42)
permuted_indices = np.random.permutation(granule_adata.n_obs)
granule_adata_permutated = granule_adata[permuted_indices].copy()

In [None]:
adata_all = anndata.concat([granule_adata_permutated, adata_neuron], axis = 0, merge = "same")
adata_all.var["genes"] = adata_all.var.index
adata_all.obs["type"] = ["Granules"] * granule_adata_permutated.shape[0] + ["Neurons"] * adata_neuron.shape[0]
adata_all.obs["type"] = pd.Categorical(adata_all.obs["type"], categories = ["Granules", "Neurons"], ordered = True)
adata_all

In [None]:
sc.tl.rank_genes_groups(adata_all, "type", method="wilcoxon")
markers = pd.DataFrame(adata_all.uns["rank_genes_groups"]["names"])
markers.head(5)

In [None]:
names = adata_all.uns["rank_genes_groups"]["names"]
names = pd.DataFrame(names)
logfc = adata_all.uns["rank_genes_groups"]["logfoldchanges"]
logfc = pd.DataFrame(logfc)
pvals = adata_all.uns["rank_genes_groups"]["pvals"]
pvals = pd.DataFrame(pvals)

markers = []

for i in ["Neurons"]:
    df = {"names": names[i], "logfc": logfc[i], "pvals": pvals[i]}
    df = pd.DataFrame(df)
    df = df[df["logfc"] >= 1.5]
    df = df[df["pvals"] <= 0.05]
    df = df.sort_values(by = ["logfc"], ascending = False)
    print(df["names"].head(15).tolist())

In [None]:
gene_groups = {"Granule Markers": ["Camk2a", "Cplx2", "Slc17a7", "Syp", "Ddn", "Map1a", "Syn1", "Shank1", "Cyfip2", "Vamp2"],
               "Neuron Markers": ["Trpc4", "Gjc3", "Plekhb1", "Ntrk2", "Ntsr2", "Fn1", "Gnai2", "Shc3", "Reep3", "Chd9", "Acsbg1"]}
target_genes = [gene for group in gene_groups.values() for gene in group]

var_group_positions = [(0, 10), (10, 20)]
var_group_labels = list(gene_groups.keys())

sc.set_figure_params(scanpy=True, fontsize=10)
fig = sc.pl.heatmap(adata_all, target_genes, groupby = "type", log = True, cmap = "Reds", standard_scale = "var", swap_axes = True, figsize = (10, 4), show = False)
plt.savefig("output/granule_neuron_heatmap.jpeg", dpi = 300, bbox_inches = "tight")
plt.close()

In [None]:
print([i for i in df["names"].head(20).tolist() if i not in syn_genes])